def removeRedundantRefvsQuery(folderName, mummerLink, fileR, fileQ, outputFileName): thres = 10 if True: alignerRobot.useMummerAlignBatch(mummerLink, folderName, [["redundantRvsQ", fileR, fileQ, ""]], houseKeeper.globalParallel) dataList = alignerRobot.extractMumData(folderName, "redundantRvsQOut") lenDicR = IORobot.obtainLength(folderName, fileR) lenDicQ = IORobot.obtainLength(folderName, fileQ) isRedundantList = [] for eachitem in dataList: match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[ 7], eachitem[8] l1, l2 = lenDicR[name1], lenDicQ[name2] if abs(l2 - match2) < thres: isRedundantList.append(name2) #print lenDicQ nonRedundantList = obtainComplement(lenDicQ, isRedundantList) print nonRedundantList IORobot.putListToFileO(folderName, fileQ, outputFileName, nonRedundantList) os.system("cp " + folderName + "SC_n_tmp.fasta " + folderName + "SC_n.fasta")
def removeRedundantRefvsQuery(folderName, mummerLink, fileR, fileQ, outputFileName): thres = 10 if True: alignerRobot.useMummerAlignBatch( mummerLink, folderName, [["redundantRvsQ", fileR, fileQ, ""]], houseKeeper.globalParallel ) dataList = alignerRobot.extractMumData(folderName, "redundantRvsQOut") lenDicR = IORobot.obtainLength(folderName, fileR) lenDicQ = IORobot.obtainLength(folderName, fileQ) isRedundantList = [] for eachitem in dataList: match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[7], eachitem[8] l1, l2 = lenDicR[name1], lenDicQ[name2] if abs(l2 - match2) < thres: isRedundantList.append(name2) # print lenDicQ nonRedundantList = obtainComplement(lenDicQ, isRedundantList) print nonRedundantList IORobot.putListToFileO(folderName, fileQ, outputFileName, nonRedundantList) os.system("cp " + folderName + "SC_n_tmp.fasta " + folderName + "SC_n.fasta")
def removeEmbedded(folderName , mummerLink): print "removeEmbedded" thres = 10 os.system("sed -e 's/|//g' " + folderName + "contigs.fasta > " + folderName + "contigs2.fasta") os.system("cp " + folderName + "contigs2.fasta " + folderName + "contigs.fasta") if not os.path.isfile(folderName + "selfOut"): alignerRobot.useMummerAlignBatch(mummerLink, folderName, [["self", "contigs.fasta", "contigs.fasta", ""]], houseKeeper.globalParallel ) # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta") # outputName, referenceName, queryName, specialName lenDic = IORobot.obtainLength(folderName, 'contigs.fasta') removeList = alignerRobot.extractMumDataAndRemove(folderName,"selfOut",lenDic,thres) nameList = [] for eachitem in lenDic: nameList.append(eachitem) print len(nameList) for eachitem in removeList: if eachitem in nameList: nameList.remove(eachitem) print len(nameList) IORobot.putListToFileO(folderName, "contigs.fasta", "noEmbed", nameList)
def removeRedundantWithFile(folderName, mummerLink, inputFilename, mummerTmpName, outputFileName): thres = 10 os.system("sed -e 's/|//g' " + folderName + inputFilename + ".fasta > " + folderName + inputFilename + "2.fasta") os.system("cp " + folderName + inputFilename + "2.fasta " + folderName + inputFilename + ".fasta") if True: alignerRobot.useMummerAlignBatch(mummerLink, folderName, [[ mummerTmpName, inputFilename + ".fasta", inputFilename + ".fasta", "" ]], houseKeeper.globalParallel) # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta") # outputName, referenceName, queryName, specialName dataList = alignerRobot.extractMumData(folderName, mummerTmpName + "Out") dataList = alignerRobot.transformCoor(dataList) lenDic = IORobot.obtainLength(folderName, inputFilename + '.fasta') removeList = [] shortEmbedClusterDic = {} for eachitem in lenDic: shortEmbedClusterDic[eachitem] = clusterElem(eachitem) for eachitem in dataList: match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[ 7], eachitem[8] if name1 != name2: l1, l2 = lenDic[name1], lenDic[name2] if abs(l1 - match1) < thres and abs(l2 - match2) > thres: removeList.append(name1) elif abs(l1 - match1) > thres and abs(l2 - match2) < thres: removeList.append(name2) elif abs(l1 - match1) < thres and abs(l2 - match2) < thres: print "Both shortembedd", eachitem union(shortEmbedClusterDic[name1], shortEmbedClusterDic[name2]) nameList = obtainComplement(lenDic, removeList) returnList = [] for eachitem in nameList: if find(shortEmbedClusterDic[eachitem]).id == eachitem: returnList.append(eachitem) print "len(nameList), len(returnList)", len(nameList), len(returnList) IORobot.putListToFileO(folderName, inputFilename + ".fasta", outputFileName, returnList)
def removeRedundantWithFile(folderName, mummerLink, inputFilename, mummerTmpName, outputFileName): thres = 10 os.system("sed -e 's/|//g' " + folderName + inputFilename + ".fasta > " + folderName + inputFilename + "2.fasta") os.system("cp " + folderName + inputFilename + "2.fasta " + folderName + inputFilename + ".fasta") if True: alignerRobot.useMummerAlignBatch( mummerLink, folderName, [[mummerTmpName, inputFilename + ".fasta", inputFilename + ".fasta", ""]], houseKeeper.globalParallel, ) # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta") # outputName, referenceName, queryName, specialName dataList = alignerRobot.extractMumData(folderName, mummerTmpName + "Out") dataList = alignerRobot.transformCoor(dataList) lenDic = IORobot.obtainLength(folderName, inputFilename + ".fasta") removeList = [] shortEmbedClusterDic = {} for eachitem in lenDic: shortEmbedClusterDic[eachitem] = clusterElem(eachitem) for eachitem in dataList: match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[7], eachitem[8] if name1 != name2: l1, l2 = lenDic[name1], lenDic[name2] if abs(l1 - match1) < thres and abs(l2 - match2) > thres: removeList.append(name1) elif abs(l1 - match1) > thres and abs(l2 - match2) < thres: removeList.append(name2) elif abs(l1 - match1) < thres and abs(l2 - match2) < thres: print "Both shortembedd", eachitem union(shortEmbedClusterDic[name1], shortEmbedClusterDic[name2]) nameList = obtainComplement(lenDic, removeList) returnList = [] for eachitem in nameList: if find(shortEmbedClusterDic[eachitem]).id == eachitem: returnList.append(eachitem) print "len(nameList), len(returnList)", len(nameList), len(returnList) IORobot.putListToFileO(folderName, inputFilename + ".fasta", outputFileName, returnList)
def removeEmbedded(folderName, mummerLink): print "removeEmbedded" thres = 10 os.system("sed -e 's/|//g' " + folderName + "contigs.fasta > " + folderName + "contigs2.fasta") os.system("cp " + folderName + "contigs2.fasta " + folderName + "contigs.fasta") if True: print "removeEmbedded: Aligning contigs.fasta to contigs.fasta, outputs are self*.delta" alignerRobot.useMummerAlignBatch( mummerLink, folderName, [["self", "contigs.fasta", "contigs.fasta", ""]], houseKeeper.globalParallel) # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta") # outputName, referenceName, queryName, specialName print "removeEmbedded: Extracting MUMmer data from delta files to selfOut" dataList = alignerRobot.extractMumData(folderName, "selfOut") dataList = alignerRobot.transformCoor(dataList) lenDic = IORobot.obtainLength(folderName, 'contigs.fasta') removeList = [] for eachitem in dataList: match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[ 7], eachitem[8] if name1 != name2: l1, l2 = lenDic[name1], lenDic[name2] if abs(l1 - match1) < thres and abs(l2 - match2) > thres: removeList.append(name1) elif abs(l1 - match1) > thres and abs(l2 - match2) < thres: removeList.append(name2) elif abs(l1 - match1) < thres and abs(l2 - match2) < thres: print "Both shortembedd", eachitem nameList = [] for eachitem in lenDic: nameList.append(eachitem) print len(nameList) for eachitem in removeList: if eachitem in nameList: nameList.remove(eachitem) print len(nameList) print "removeEmbedded: Outputting non-contained contigs to noEmbed.fasta" IORobot.putListToFileO(folderName, "contigs.fasta", "noEmbed", nameList)
def removeEmbedded(folderName, mummerLink): print "removeEmbedded" thres = 10 command = r'''perl -pe 's/>[^\$]*$/">Seg" . ++$n ."\n"/ge' ''' + folderName + "raw_reads.fasta > " + folderName + houseKeeper.globalReadName os.system(command) command = r'''perl -pe 's/>[^\$]*$/">Seg" . ++$n ."\n"/ge' ''' + folderName + "contigs.fasta > " + folderName + houseKeeper.globalContigName os.system(command) if True: alignerRobot.useMummerAlignBatch(mummerLink, folderName, [[ "self", houseKeeper.globalContigName, houseKeeper.globalContigName, "" ]], houseKeeper.globalParallel) # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta") # outputName, referenceName, queryName, specialName dataList = alignerRobot.extractMumData(folderName, "selfOut") dataList = alignerRobot.transformCoor(dataList) lenDic = IORobot.obtainLength(folderName, houseKeeper.globalContigName) removeList = [] for eachitem in dataList: match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[ 7], eachitem[8] if name1 != name2: l1, l2 = lenDic[name1], lenDic[name2] if abs(l1 - match1) < thres and abs(l2 - match2) > thres: removeList.append(name1) elif abs(l1 - match1) > thres and abs(l2 - match2) < thres: removeList.append(name2) elif abs(l1 - match1) < thres and abs(l2 - match2) < thres: print "Both shortembedd", eachitem nameList = [] for eachitem in lenDic: nameList.append(eachitem) print len(nameList) for eachitem in removeList: if eachitem in nameList: nameList.remove(eachitem) print len(nameList) IORobot.putListToFileO(folderName, houseKeeper.globalContigName, "noEmbed", nameList)
def removeEmbedded(folderName , mummerLink): print "removeEmbedded" thres = 10 command= r'''perl -pe 's/>[^\$]*$/">Seg" . ++$n ."\n"/ge' ''' + folderName + "raw_reads.fasta > " + folderName + houseKeeper.globalReadName os.system(command) command= r'''perl -pe 's/>[^\$]*$/">Seg" . ++$n ."\n"/ge' ''' + folderName + "contigs.fasta > " + folderName + houseKeeper.globalContigName os.system(command) if True: alignerRobot.useMummerAlignBatch(mummerLink, folderName, [["self", houseKeeper.globalContigName, houseKeeper.globalContigName, ""]], houseKeeper.globalParallel) # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta") # outputName, referenceName, queryName, specialName dataList = alignerRobot.extractMumData(folderName, "selfOut") dataList = alignerRobot.transformCoor(dataList) lenDic = IORobot.obtainLength(folderName, houseKeeper.globalContigName) removeList = [] for eachitem in dataList: match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[7], eachitem[8] if name1 != name2: l1, l2 = lenDic[name1], lenDic[name2] if abs(l1 - match1) < thres and abs(l2 - match2) > thres: removeList.append(name1) elif abs(l1 - match1) > thres and abs(l2 - match2) < thres: removeList.append(name2) elif abs(l1 - match1) < thres and abs(l2 - match2) < thres: print "Both shortembedd", eachitem nameList = [] for eachitem in lenDic: nameList.append(eachitem) print len(nameList) for eachitem in removeList: if eachitem in nameList: nameList.remove(eachitem) print len(nameList) IORobot.putListToFileO(folderName, houseKeeper.globalContigName, "noEmbed", nameList)
def runningTestSet(self ,myFolderName, ctexpected): print "Integration test on FinisherSC: " + myFolderName self.sourceFolder = myFolderName os.system("mkdir " + self.testingFolder) for eachitem in self.listOfFiles: os.system("cp "+ self.sourceFolder + eachitem + " " +self.testingFolder) os.system("python finisherSC.py -par 4 "+ self.testingFolder + " "+ self.mummerPath) lenDic = IORobot.obtainLength(self.testingFolder, "/improved3.fasta") print lenDic assert(len(lenDic) == ctexpected) os.system("rm -rf "+ self.testingFolder)
def removeEmbedded(folderName, mummerLink): print "removeEmbedded" thres = 10 os.system("sed -e 's/|//g' " + folderName + "contigs.fasta > " + folderName + "contigs2.fasta") os.system("cp " + folderName + "contigs2.fasta " + folderName + "contigs.fasta") if True: print "removeEmbedded: Aligning contigs.fasta to contigs.fasta, outputs are self*.delta" alignerRobot.useMummerAlignBatch( mummerLink, folderName, [["self", "contigs.fasta", "contigs.fasta", ""]], houseKeeper.globalParallel ) # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta") # outputName, referenceName, queryName, specialName print "removeEmbedded: Extracting MUMmer data from delta files to selfOut" dataList = alignerRobot.extractMumData(folderName, "selfOut") dataList = alignerRobot.transformCoor(dataList) lenDic = IORobot.obtainLength(folderName, "contigs.fasta") removeList = [] for eachitem in dataList: match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[7], eachitem[8] if name1 != name2: l1, l2 = lenDic[name1], lenDic[name2] if abs(l1 - match1) < thres and abs(l2 - match2) > thres: removeList.append(name1) elif abs(l1 - match1) > thres and abs(l2 - match2) < thres: removeList.append(name2) elif abs(l1 - match1) < thres and abs(l2 - match2) < thres: print "Both shortembedd", eachitem nameList = [] for eachitem in lenDic: nameList.append(eachitem) print len(nameList) for eachitem in removeList: if eachitem in nameList: nameList.remove(eachitem) print len(nameList) print "removeEmbedded: Outputting non-contained contigs to noEmbed.fasta" IORobot.putListToFileO(folderName, "contigs.fasta", "noEmbed", nameList)
def observeOverlap(folderName): dataList = alignerRobot.extractMumData(folderName, "selfOut") dataList = alignerRobot.transformCoor(dataList) lenDic = IORobot.obtainLength(folderName, 'contigs.fasta') matchThres = 10000 nonMatchThres = 500 count = 0 newDataList = [] for eachitem in dataList: name1, name2 = eachitem[-2], eachitem[-1] matchLen1, matchLen2 = eachitem[4], eachitem[5] start1, end1, start2, end2 = eachitem[0], eachitem[1], eachitem[ 2], eachitem[3] # if name1!= name2 and min(lenDic[name1] - end1, lenDic[name2] - end2 ) > nonMatchThres \ # and min(start1, start2) > nonMatchThres \ if name1!= name2 and ( min(lenDic[name1] - end1, lenDic[name2] - end2 ) > nonMatchThres \ or min(start1, start2) > nonMatchThres ) \ and matchLen1> matchThres: print "eachitem ", eachitem, lenDic[name1], lenDic[name2] count = count + 1 newDataList.append(eachitem) print "Count: " + str(count) blkDic = getBreakPointFromDataList(folderName, newDataList) LCList = IORobot.loadContigsFromFile(folderName, "contigs.fasta") contigList = [] for eachcontig in LCList: #print eachcontig if not eachcontig in blkDic: contigList = contigList + [LCList[eachcontig]] else: contigList = contigList + tmpBreakAcBkPts(LCList[eachcontig], blkDic[eachcontig]) print "len(contigList)", len(contigList) IORobot.writeSegOut(contigList, folderName, "breakChains.fasta")
def getBreakPointFromDataList(folderName, dataList): g = 1000 blkDic = {} dataList.sort(key=itemgetter(-2)) lenDic = IORobot.obtainLength(folderName, "contigs.fasta") json_data = open(folderName + "modifiedOutliners.json", 'r') breakPtsDic = json.load(json_data) sep = 5000 for key, items in groupby(dataList, itemgetter(-2)): contigName = key newList = [] for eachitem in items: newList.append([eachitem[0], eachitem[1]]) newList.sort() bktmp = [0] if newList[0][0] > g: if withinBound(sep, breakPtsDic[contigName], newList[0][0]): bktmp.append(newList[0][0]) #bktmp.append(newList[0][0]) for i in range(len(newList) - 1): if newList[i + 1][0] > newList[i][1] + g: if withinBound(sep, breakPtsDic[contigName], newList[i + 1][0]): bktmp.append(newList[i + 1][0]) bktmp.append(lenDic[contigName]) blkDic[contigName] = bktmp print "contigName: " + contigName print "bktmp:", bktmp print "breakPtsDic[contigName]", breakPtsDic[contigName] return blkDic
def extractEdgeSet(folderName, mummerLink, option="nopolish"): # Tasks: reconstruct the string graph # Input : relatedReads_Double.fasta, conig_Double.fasta # Intermediate files: fromMum_overlap , fromMum_overlap # Output: connectivity of eachNode: InList, OutList [critical] # connectivity of eachNode: arrow representation with size [optional] # ## Perform MUMMER alignment print ">Extract Edge set" contigOnlyLengthDic = IORobot.obtainLength(folderName, "improved.fasta") # print lengthDic lengthDic = IORobot.findContigLength(folderName, "improved") numberOfContig = len(contigOnlyLengthDic)*2 K = 400 thres = 5 # ## Apply MUMMER on them using cleanedReads against them IORobot.truncateEndOfContigs(folderName, "improved_Double.fasta", "smaller_improvedContig.fasta", 25000, lengthDic) dataSet = [] numberOfFiles = max(20, houseKeeper.globalParallel) if True: bindir = os.path.abspath(os.path.dirname(sys.argv[0])) command = bindir + "/fasta-splitter.pl --n-parts " + str(numberOfFiles) + " " + folderName + "relatedReads_Double.fasta" os.system(command) workerList = [] for dummyI in range(1, numberOfFiles + 1): indexOfMum = "" if dummyI < 10: indexOfMum = "0" + str(dummyI) else: indexOfMum = str(dummyI) outputName, referenceName, queryName, specialName= "outGapFillRefine"+indexOfMum , "smaller_improvedContig.fasta", "relatedReads_Double.part-" + indexOfMum + ".fasta", "fromMumRefine" + indexOfMum workerList.append([outputName, referenceName, queryName, specialName]) if True: alignerRobot.useMummerAlignBatch(mummerLink, folderName, workerList, houseKeeper.globalParallel ,True) # alignerRobot.useMummerAlign(mummerLink, folderName, "outRefine", "smaller_improvedContig.fasta", "relatedReads_Double.part-" + indexOfMum + ".fasta", True, "fromMumRefine" + indexOfMum) for dummyI in range(1, numberOfFiles + 1): tmpSet = IORobot.obtainLinkInfoReadContig(dummyI, mummerLink, folderName,thres, lengthDic, K) dataSet = dataSet + tmpSet # ## repeat aware usableJunction = loadOpenList(folderName) dataSet, blockedSet = filterRepeatEnd(dataSet, usableJunction) # ## repeat aware end dataSet.sort() matchPair = formMatchPairFromReadInfo(dataSet) # Bug fix on repeat detection from reads alone matchPair = filterRepeatPair(matchPair) # end bug fix # print matchPair bestMatchPair = [] for key, items in groupby(matchPair, itemgetter(0, 1)): maxvalue = -1 maxLenPair = [] for eachitem in items: if eachitem[2] > maxvalue: maxvalue = eachitem[2] maxLenPair = [eachitem[3], eachitem[4], eachitem[5]] bestMatchPair.append([key[0], key[1], maxvalue, maxLenPair[0], maxLenPair[1], maxLenPair[2]]) contigList, leftConnect, rightConnect, rawReadList = formbestpair(bestMatchPair,numberOfContig) print "contigList", contigList writeContigReadCombine(blockedSet, dataSet, folderName, rawReadList, numberOfContig, contigList, leftConnect, option, rightConnect, mummerLink)
def xPhased(folderName , mummerLink): # ## Repeat resolution [Proxy for MB] # 1. Re-form the contig string graph with ALL connections from contigs only V # 2. Log down the reads and associated blocked contigs V # 3. Use reads to connect; # 4. Transform graph by identifying 1 successor/predecessor case ; Condense(important); # 5. Read out contigs numberOfContig, dataSet = IORobot.obtainLinkInfo(folderName, mummerLink, "improved2", "mb") lenDic = IORobot.obtainLength(folderName, "improved2_Double.fasta") confidenLenThres = 0 G = graphLib.seqGraph(numberOfContig) extraEdges = loadEdgeFromBlockedReads(folderName) for eachitem in dataSet: # print eachitem wt, myin, myout = eachitem myInData = myin[6:].split('_') myOutData = myout[6:].split('_') if myInData[1] == 'p': offsetin = 0 else: offsetin = 1 if myOutData[1] == 'p': offsetout = 0 else: offsetout = 1 i = int(myInData[0]) * 2 + offsetin j = int(myOutData[0]) * 2 + offsetout ck = False for eachedge in extraEdges: mystart, myend, len1, len2 = eachedge[0], eachedge[1], eachedge[2] , eachedge[3] if [i, j] == [mystart, myend] and min(len1, len2) >= wt and lenDic[myin] >= confidenLenThres and lenDic[myout] >= confidenLenThres: ck = True if ck: G.insertEdge(i, j, wt) # G.reportEdge() G.MBResolve() G.reportEdge() G.saveToFile(folderName, "condensedGraphMB.txt") graphFileName = "condensedGraphMB.txt" contigFile = "improved2_Double.fasta" outContigFile = "improved3.fasta" outOpenList = "openZoneMB.txt" IORobot.readContigOut(folderName, mummerLink, graphFileName, contigFile, outContigFile, outOpenList) # ## Repeat resolution [Proxy for phasing step] # 6. Find out the repeat region by MSA # 7. Find out the location of SNPs and extend across repeat # [short cut : use contig creator : your job here is to get data into the correct formats] print "xPhased"
def xPhased(folderName , mummerLink): # ## Repeat resolution [Proxy for MB] # 1. Re-form the contig string graph with ALL connections from contigs only V # 2. Log down the reads and associated blocked contigs V # 3. Use reads to connect; # 4. Transform graph by identifying 1 successor/predecessor case ; Condense(important); # 5. Read out contigs print "xPhased: Aligning improved2.fasta against itself, outputting to mb*.delta" numberOfContig, dataSet = IORobot.obtainLinkInfo(folderName, mummerLink, "improved2", "mb") lenDic = IORobot.obtainLength(folderName, "improved2_Double.fasta") confidenLenThres = 0 print "xPhased: Building seqGraph" G = graphLib.seqGraph(numberOfContig) extraEdges = loadEdgeFromBlockedReads(folderName) for eachitem in dataSet: # print eachitem wt, myin, myout = eachitem myInData = myin[6:].split('_') myOutData = myout[6:].split('_') if myInData[1] == 'p': offsetin = 0 else: offsetin = 1 if myOutData[1] == 'p': offsetout = 0 else: offsetout = 1 i = int(myInData[0]) * 2 + offsetin j = int(myOutData[0]) * 2 + offsetout ck = False for eachedge in extraEdges: mystart, myend, len1, len2 = eachedge[0], eachedge[1], eachedge[2] , eachedge[3] if [i, j] == [mystart, myend] and min(len1, len2) >= wt and lenDic[myin] >= confidenLenThres and lenDic[myout] >= confidenLenThres: ck = True if ck: G.insertEdge(i, j, wt) # G.reportEdge() G.MBResolve() G.reportEdge() print "xPhased: Saving condensed seqGraph to condensedGraphMB.txt" G.saveToFile(folderName, "condensedGraphMB.txt") graphFileName = "condensedGraphMB.txt" contigFile = "improved2_Double.fasta" outContigFile = "improved3.fasta" outOpenList = "openZoneMB.txt" print "xPhased: Outputting improved contigs from condensed seqGraph to improved3.fasta" IORobot.readContigOut(folderName, mummerLink, graphFileName, contigFile, outContigFile, outOpenList) # ## Repeat resolution [Proxy for phasing step] # 6. Find out the repeat region by MSA # 7. Find out the location of SNPs and extend across repeat # [short cut : use contig creator : your job here is to get data into the correct formats] print "xPhased"