def adjustOpticalMap(contigMapFile, opticalMapFileOrig, opticalMapFileNew): # Perform an initial alignment of of the contigMapFile against the opticalMapFile outputPfx = '%s.%s.alignForAdjustment'%(contigMapFile,opticalMapFileOrig) res = makeAlignments(opticalMapFileOrig, contigMapFile, outputPfx) ml = parseSomaMatch.parseMatchFileXML(res['xmlFile']) sys.stderr.write('adjustOpticalMap: Parsed %i matches from file %s\n'%(len(ml), res['xmlFile'])) # Filter the match list: minHits = 10 maxMissRate = 0.10 def matchOK(mr): hitsOK = mr.contigHits >= minHits missRateOK = max(mr.contigMissRate, mr.opticalMissRate) <= maxMissRate return hitsOK and missRateOK goodMatches = [mr for mr in ml if matchOK(mr)] sys.stderr.write('adjustOpticalMap: Filtered to %i matches based on quality'%(len(goodMatches))) # Count the number of good alignments per contig. Only select the unique alignments contigAlignmentCounts = Counter(mr.contigId for mr in goodMatches) uniqueAlignments = [mr for mr in goodMatches if contigAlignmentCounts[mr.contigId]==1] sys.stderr.write('adjustOpticalMap: Filtered to %i matches based on uniqueness'%(len(uniqueAlignments))) # Create a matched chunk file matchedChunkFile = '%s.matchedChunks'%outputPfx adjustOpticalMaps.makeMatchedChunkFile(uniqueAlignments, matchedChunkFile) adjustOpticalMaps.run(opticalMapFileOrig, matchedChunkFile, opticalMapFileNew)
def parseRandomAlignments(xmlFile): numFrags = int(xmlFile.split('.')[1]) matchResults = parseSomaMatch.parseMatchFileXML(xmlFile) ml = matchResults matchDict = dict( (mr.contigId, mr) for mr in matchResults) resDict = {} resDict['matchDict'] = matchDict resDict['chunkScores'] = [chunkScore for mr in ml for chunkScore in mr.getChunkScores()] resDict['scores'] = [mr.score for mr in ml] return resDict
def postProcess(xmlFile, opticalMapFile, contigMapFile, outputPfx): # Parse results print '\n'+'*'*50 print 'Parsing SOMA OUTPUT...' print '*'*50 + '\n' pickleFileAll = '%s.matchList.all.pickle'%outputPfx pickleFileSig = '%s.matchList.sig.pickle'%outputPfx pickleFileSigUnique = '%s.matchList.sig.unique.pickle'%outputPfx # Parse Match Results. Write Pickle Files ml = parseSomaMatch.parseMatchFileXML(xmlFile) significanceTest.runSignificanceTest(ml, contigMapFile, opticalMapFile, numThreads=numThreads) # Select significant results pvalCutoff = 0.05 sigMatches = [mr for mr in ml if mr.pval <= pvalCutoff] sigMatchDict = parseSomaMatch.collectMatchResultsByContig(sigMatches) sigUniqueMatches = [matches[0] for contigId, matches in sigMatchDict.iteritems() if len(matches)==1] sigUniqueMatchDict = parseSomaMatch.collectMatchResultsByContig(sigUniqueMatches) sys.stdout.write('Found %i significant matches (%i bp)\n'%(len(sigMatches), sum(mr.cAlignedBases for mr in sigMatches))) sys.stdout.write('Found %i unique significant matches (%i bp)\n'%(len(sigUniqueMatches), sum(mr.cAlignedBases for mr in sigUniqueMatches))) # Pickle the matchResults cPickle.dump(ml, open(pickleFileAll, 'w')) cPickle.dump(sigMatches, open(pickleFileSig, 'w')) cPickle.dump(sigUniqueMatches, open(pickleFileSigUnique, 'w')) infoFileOut = '%s.info'%outputPfx parseSomaMatch.writeInfoFile2(ml, infoFileOut) # Summarize alignment status for contigs in the silicoFile contigMapDict = SOMAMap.readMaps(contigMapFile) opMapDict= SOMAMap.readMaps(opticalMapFile) summarizeContigStatus.summarizeContigStatus(outputPfx, sigMatchDict, contigMapDict) # Print all of the alignments to a textFile fout = open('%s.SigUniqueAlignments.txt'%outputPfx, 'w') parseSomaMatch.printAlignments(sigUniqueMatches, fout) fout.close() fout = open('%s.AllSigAlignments.txt'%outputPfx, 'w') parseSomaMatch.printAlignments(sigMatches, fout) fout.close() # Create scaffolds print '\n'+'*'*50 print 'Creating Scaffolds...' print '*'*50 + '\n' createScaffolds.createScaffolds(sigMatchDict, opMapDict, '%s.scaffold_sigMatches_withOverlaps.txt'%outputPfx, allowOverlaps=True) createScaffolds.createScaffolds(sigMatchDict, opMapDict, '%s.scaffold_sigMatches_noOverlaps.txt'%outputPfx, allowOverlaps=False) createScaffolds.createScaffolds(sigUniqueMatchDict, opMapDict, '%s.scaffold_sigUniqueMatches_withOverlaps.txt'%outputPfx, allowOverlaps=True) createScaffolds.createScaffolds(sigUniqueMatchDict, opMapDict, '%s.scaffold_sigUniqueMatches_noOverlaps.txt'%outputPfx, allowOverlaps=False)