Пример #1
0
def predict(path, weightPath1, weightPath2, method):
    
    predictionPath = "predictions.txt"
    
    predictions = []    
    limit = 500
    
    # load
    weight1 = np.load(weightPath1)
    weight2 = np.load(weightPath2)
    
    method = method.lower()
    if(method == 'fasta'):
        proteins = fileUtils.readFasta(path)
        
        for proteinId in range(len(proteins)):  
            
            sequences = sequenceUtils.openReadingFrames(proteins[proteinId])        
            
            for pos in range(len(sequences)):
                
                 # lav sekvens om til binær
                inputLayer = sequenceUtils.createInputLayer(sequences[pos])
                
                # forward
                outputLayer = forward(inputLayer, weight1, weight2)[1]
                outputLayer = logTransform.invTransform(outputLayer)                
                
                if(outputLayer <= limit):
                    # plus one, since both are zero indexed
                    predictions.append([proteinId + 1, pos + 1])
    
    
                
    np.savetxt(predictionPath, np.array(predictions), fmt = '%d', delimiter = '\t')
    print("There is {} predicted epitopes.".format(len(predictions)))   
Пример #2
0
from forward import forward


# set seed to be able to reproduce results
np.random.seed(1234)

limit = 500
syfLimit = 21
names = np.array(["gag", "pol", "vif", "vpr", "tat", "rev", "vpu", "env", "nef"])

# mhc epitopes
mhcSequences, mhcAffinities = fileUtils.readHLA("data/mhcSequences.txt")
mhcEpitopes = mhcSequences[mhcAffinities <= limit]

# complete hiv
hivProteins = fileUtils.readFasta("data/hivCodingSequences.txt")


# SMMPMBEC
smm0 = fileUtils.readColumn("data/smmpmbec.csv", 0, True)
smm1 = fileUtils.readColumn("data/smmpmbec.csv", 1, True)
smm2 = fileUtils.readColumn("data/smmpmbec.csv", 2, True)
smm3 = fileUtils.readColumn("data/smmpmbec.csv", 3, True)
index = smm3 <= limit
smm = [smm2[index], np.repeat(0, sum(index)), smm1[index]]
# replace names wih numbers
for name in names:
    # plus one to make it one indexed
    smm[1][smm0[index] == name] = np.where(name == names)[0] +1