Python SeqGenUtils.fasta_read示例

编程语言: Python

类/类型: SeqGenUtils

方法/功能: fasta_read

hotexamples.com的示例: 6

Python SeqGenUtils.fasta_read - 已找到6个示例。这些是从开源项目中提取的最受好评的SeqGenUtils.fasta_read现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

fasta_read(6)

findFiles(2)

writeFastaLinesToFile(2)

createFastaFileFromKmers(1)

weightedchoice(1)

示例#1

显示文件

文件： compareKmerCommon.py 项目： shwetabhandare/PySG

def GetTotalNumbers(realKmerDict, posFile, negFile, pssmList, pwm, predictedMotifs, predictedKmers=None):
	PosSeqDict = SeqGenUtils.fasta_read(posFile);
	NegSeqDict = SeqGenUtils.fasta_read(negFile);

	numPosTP = 	numPosFP = 	numPosFN = 0;	
	numNegTP = 	numNegFP = 	numNegFP = 0;	

	#print "Predicted KMER DICT: ", predictedKmers;
	numPosTP, numPosFP, numPosFN = getTotalNumbersForSeqDict(PosSeqDict, realKmerDict, pssmList, pwm, predictedMotifs, predictedKmers);
	numNegTP, numNegFP, numNegFN = getTotalNumbersForSeqDict(NegSeqDict, realKmerDict, pssmList, pwm, predictedMotifs, predictedKmers);

	#print "Pos File: TP: ", str(numPosTP), ", FP: ", numPosFP, ", FN: ", numPosFN
	#print "Neg File: TP: ", str(numNegTP), ", FP: ", numNegFP, ", FN: ", numNegFN
	return (numPosTP + numNegTP), (numPosFP + numNegFP) , (numPosFN + numNegFN);

示例#2

显示文件

文件： Kmer.py 项目： shwetabhandare/PySG

def GetKmersFromStructureFile(structureFile, numSeqsWithSignal):
	structureDict = SeqGenUtils.fasta_read(structureFile);
	structureDict = SeqGenUtils.ChangeUsToTs(structureDict);
	originalKmers = structureDict.values();
	
	updatedKmers = CreateLengthMatchingKmers(originalKmers, numSeqsWithSignal)
	return updatedKmers;

示例#3

显示文件

文件： pyngram.py 项目： shwetabhandare/PySG

def ComputeNgramFrequencyAndProbability(seqFile, nLen):

    seqDict = SeqGenUtils.fasta_read(seqFile);
    nGramCombinedList = list();
    total_seq_length = 0;

    for header, sequence in seqDict.iteritems():
        ngram_list = getNgramListForSeq(sequence, nLen)
        if len(ngram_list) > 0:
            nGramCombinedList.append(ngram_list)
            total_seq_length = total_seq_length + len(sequence)
        else: 
            print "Found empty sequence for ", header;

    ngram_freq = getNGramFreqForCombinedList(nGramCombinedList, nLen);
    ngram_freq = dict(ngram_freq)
    ngram_prob = {}

    for ngram, frequency in ngram_freq.iteritems():
        ngram_prob[ngram] = round(frequency/total_seq_length, 4);

    # print str(nLen) + "-" + "gram frequences: \n", ngram_freq;
    #print str(nLen) + "-" + "gram probabilities: \n", ngram_prob;
    #print "Total Nucleotides: ", str(total_seq_length) 

    return ngram_freq, ngram_prob;

示例#4

显示文件

文件： test_generate_yaml.py 项目： shwetabhandare/PySG

	def test_DictFromFasta(self):
		structureAlignmentFile = "/projects/bhandare/workspace/PySG/src/resources/RF000037.fa"
		structureDict = SeqGenUtils.fasta_read(structureAlignmentFile);
		self.assertEqual(len(structureDict), 62);

		for key, value in structureDict.iteritems():
			condition = 'T' in value;
			self.assertFalse(condition);

		structureDict = SeqGenUtils.ChangeUsToTs(structureDict);
		self.assertEqual(len(structureDict), 62);
		for key, value in structureDict.iteritems():
			condition = 'T' in value;
			self.assertTrue(condition);

示例#5

显示文件

文件： dremeSequenceBasedComparison.py 项目： shwetabhandare/PySG

def computeSequenceBasedDREMEResults(dremeFile, realCsvFile, posSeqFile, negSeqFile):
	totalPosTP = totalPosFP = totalPosFN = totalPosTN = 0;
	totalNegTP = totalNegFP = totalNegFN = totalNegTN = 0;

	posSeqDict  = SeqGenUtils.fasta_read(posSeqFile);
	negSeqDict  = SeqGenUtils.fasta_read(negSeqFile);
	realKmerDict = parseRealKmers.GetRealKmerDict(realCsvFile);
	pssmList = parseDreme.getPSSMListFromDremeFile(dremeFile)

	numPosTP, numPosFP, numPosFN, numPosTN = compareRealAndPredicted(realKmerDict, posSeqDict, pssmList, positive=True)
	numNegTP, numNegFP, numNegFN, numNegTN = compareRealAndPredicted(realKmerDict, negSeqDict, pssmList, False)

	print "Positive: TP: ", numPosTP, ", FP: ", numPosFP, ", FN: ", numPosFN, ", TN: ", numPosTN
	print "Negative: TP: ", numNegTP, ", FP: ", numNegFP, ", FN: ", numNegFN, ", TN: ", numNegTN

	totalPos = len(posSeqDict)
	totalNeg = len(negSeqDict)

	sensitivity, ppv = compareKmers.GetSensitivityAndPPV((numPosTP + numNegTP) , (numPosFP + numNegFP), (numPosFN + numNegFN))
	accuracy = compareKmers.GetAccuracy( (numPosTP + numNegTP), (numPosTN + numNegTN),  (totalPos + totalNeg) )
	specificity = compareKmers.GetSpecificity( (numPosFP + numNegFP), totalNeg);

	print "Senitivity: ", sensitivity, ", PPV: ", ppv, ", Accuracy: ", accuracy, ", Specificity: ", specificity;
	return sensitivity, ppv;

示例#6

显示文件

文件： MotifAnalysis.py 项目： shwetabhandare/PySG

					motifCountDict[motif] = 1;

		if motifFound == False:
			totalMisMatches = totalMisMatches + 1;

	print "Total Mismatches: ", totalMisMatches;
	return motifMatchDict, motifCountDict;

if __name__ == "__main__":	
	import sys
	motifFile = sys.argv[1]
	faFile = sys.argv[2]

	motifList = getMotifList(motifFile)
	print motifList
	seqDict  = SeqGenUtils.fasta_read(faFile)
	motifMatchDict, motifCountDict = findMotifInSequences(motifList, seqDict)

	singleMotif = 0
	multipleMotif = 0;
	for header, value in motifMatchDict.iteritems():
		if len(value) > 1:
			multipleMotif = multipleMotif + 1;
		if len(value) == 1:
			singleMotif = singleMotif + 1;

	print "Sequences that matches single motif: ", singleMotif;
	print "Sequences that matches multiple motifs: ", multipleMotif;
	print "Total Sequences: ", len(seqDict)

	# for item in sorted(motifMatchDict):