Exemplo n.º 1
0
def removeRedundantRefvsQuery(folderName, mummerLink, fileR, fileQ,
                              outputFileName):

    thres = 10

    if True:
        alignerRobot.useMummerAlignBatch(mummerLink, folderName,
                                         [["redundantRvsQ", fileR, fileQ, ""]],
                                         houseKeeper.globalParallel)

    dataList = alignerRobot.extractMumData(folderName, "redundantRvsQOut")
    lenDicR = IORobot.obtainLength(folderName, fileR)
    lenDicQ = IORobot.obtainLength(folderName, fileQ)

    isRedundantList = []

    for eachitem in dataList:
        match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[
            7], eachitem[8]
        l1, l2 = lenDicR[name1], lenDicQ[name2]

        if abs(l2 - match2) < thres:
            isRedundantList.append(name2)

    #print lenDicQ

    nonRedundantList = obtainComplement(lenDicQ, isRedundantList)

    print nonRedundantList
    IORobot.putListToFileO(folderName, fileQ, outputFileName, nonRedundantList)

    os.system("cp " + folderName + "SC_n_tmp.fasta " + folderName +
              "SC_n.fasta")
def removeRedundantRefvsQuery(folderName, mummerLink, fileR, fileQ, outputFileName):

    thres = 10

    if True:
        alignerRobot.useMummerAlignBatch(
            mummerLink, folderName, [["redundantRvsQ", fileR, fileQ, ""]], houseKeeper.globalParallel
        )

    dataList = alignerRobot.extractMumData(folderName, "redundantRvsQOut")
    lenDicR = IORobot.obtainLength(folderName, fileR)
    lenDicQ = IORobot.obtainLength(folderName, fileQ)

    isRedundantList = []

    for eachitem in dataList:
        match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[7], eachitem[8]
        l1, l2 = lenDicR[name1], lenDicQ[name2]

        if abs(l2 - match2) < thres:
            isRedundantList.append(name2)

    # print lenDicQ

    nonRedundantList = obtainComplement(lenDicQ, isRedundantList)

    print nonRedundantList
    IORobot.putListToFileO(folderName, fileQ, outputFileName, nonRedundantList)

    os.system("cp " + folderName + "SC_n_tmp.fasta " + folderName + "SC_n.fasta")
Exemplo n.º 3
0
def removeEmbedded(folderName , mummerLink):
    print "removeEmbedded"
    thres = 10
    os.system("sed -e 's/|//g' " + folderName + "contigs.fasta  > " + folderName + "contigs2.fasta")

    os.system("cp " + folderName + "contigs2.fasta " + folderName + "contigs.fasta") 

    if not os.path.isfile(folderName + "selfOut"):
        alignerRobot.useMummerAlignBatch(mummerLink, folderName, [["self", "contigs.fasta", "contigs.fasta", ""]], houseKeeper.globalParallel )
        # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta")
        # outputName, referenceName, queryName, specialName
    
    lenDic = IORobot.obtainLength(folderName, 'contigs.fasta')
    removeList = alignerRobot.extractMumDataAndRemove(folderName,"selfOut",lenDic,thres)
        
    nameList = []
    for eachitem in lenDic:
        nameList.append(eachitem)

    print len(nameList)
    
    for eachitem in removeList:
        if eachitem in nameList:
            nameList.remove(eachitem)
    print len(nameList)
    
    IORobot.putListToFileO(folderName, "contigs.fasta", "noEmbed", nameList)
Exemplo n.º 4
0
def removeRedundantWithFile(folderName, mummerLink, inputFilename,
                            mummerTmpName, outputFileName):
    thres = 10
    os.system("sed -e 's/|//g' " + folderName + inputFilename + ".fasta  > " +
              folderName + inputFilename + "2.fasta")

    os.system("cp " + folderName + inputFilename + "2.fasta " + folderName +
              inputFilename + ".fasta")

    if True:
        alignerRobot.useMummerAlignBatch(mummerLink, folderName, [[
            mummerTmpName, inputFilename + ".fasta", inputFilename + ".fasta",
            ""
        ]], houseKeeper.globalParallel)
        # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta")
        # outputName, referenceName, queryName, specialName

    dataList = alignerRobot.extractMumData(folderName, mummerTmpName + "Out")

    dataList = alignerRobot.transformCoor(dataList)

    lenDic = IORobot.obtainLength(folderName, inputFilename + '.fasta')

    removeList = []

    shortEmbedClusterDic = {}

    for eachitem in lenDic:
        shortEmbedClusterDic[eachitem] = clusterElem(eachitem)

    for eachitem in dataList:
        match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[
            7], eachitem[8]

        if name1 != name2:
            l1, l2 = lenDic[name1], lenDic[name2]

            if abs(l1 - match1) < thres and abs(l2 - match2) > thres:
                removeList.append(name1)
            elif abs(l1 - match1) > thres and abs(l2 - match2) < thres:
                removeList.append(name2)
            elif abs(l1 - match1) < thres and abs(l2 - match2) < thres:
                print "Both shortembedd", eachitem
                union(shortEmbedClusterDic[name1], shortEmbedClusterDic[name2])

    nameList = obtainComplement(lenDic, removeList)

    returnList = []

    for eachitem in nameList:
        if find(shortEmbedClusterDic[eachitem]).id == eachitem:
            returnList.append(eachitem)

    print "len(nameList), len(returnList)", len(nameList), len(returnList)

    IORobot.putListToFileO(folderName, inputFilename + ".fasta",
                           outputFileName, returnList)
def removeRedundantWithFile(folderName, mummerLink, inputFilename, mummerTmpName, outputFileName):
    thres = 10
    os.system("sed -e 's/|//g' " + folderName + inputFilename + ".fasta  > " + folderName + inputFilename + "2.fasta")

    os.system("cp " + folderName + inputFilename + "2.fasta " + folderName + inputFilename + ".fasta")

    if True:
        alignerRobot.useMummerAlignBatch(
            mummerLink,
            folderName,
            [[mummerTmpName, inputFilename + ".fasta", inputFilename + ".fasta", ""]],
            houseKeeper.globalParallel,
        )
        # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta")
        # outputName, referenceName, queryName, specialName

    dataList = alignerRobot.extractMumData(folderName, mummerTmpName + "Out")

    dataList = alignerRobot.transformCoor(dataList)

    lenDic = IORobot.obtainLength(folderName, inputFilename + ".fasta")

    removeList = []

    shortEmbedClusterDic = {}

    for eachitem in lenDic:
        shortEmbedClusterDic[eachitem] = clusterElem(eachitem)

    for eachitem in dataList:
        match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[7], eachitem[8]

        if name1 != name2:
            l1, l2 = lenDic[name1], lenDic[name2]

            if abs(l1 - match1) < thres and abs(l2 - match2) > thres:
                removeList.append(name1)
            elif abs(l1 - match1) > thres and abs(l2 - match2) < thres:
                removeList.append(name2)
            elif abs(l1 - match1) < thres and abs(l2 - match2) < thres:
                print "Both shortembedd", eachitem
                union(shortEmbedClusterDic[name1], shortEmbedClusterDic[name2])

    nameList = obtainComplement(lenDic, removeList)

    returnList = []

    for eachitem in nameList:
        if find(shortEmbedClusterDic[eachitem]).id == eachitem:
            returnList.append(eachitem)

    print "len(nameList), len(returnList)", len(nameList), len(returnList)

    IORobot.putListToFileO(folderName, inputFilename + ".fasta", outputFileName, returnList)
def removeEmbedded(folderName, mummerLink):
    print "removeEmbedded"
    thres = 10
    os.system("sed -e 's/|//g' " + folderName + "contigs.fasta  > " +
              folderName + "contigs2.fasta")

    os.system("cp " + folderName + "contigs2.fasta " + folderName +
              "contigs.fasta")

    if True:
        print "removeEmbedded: Aligning contigs.fasta to contigs.fasta, outputs are self*.delta"
        alignerRobot.useMummerAlignBatch(
            mummerLink, folderName,
            [["self", "contigs.fasta", "contigs.fasta", ""]],
            houseKeeper.globalParallel)
        # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta")
        # outputName, referenceName, queryName, specialName

    print "removeEmbedded: Extracting MUMmer data from delta files to selfOut"
    dataList = alignerRobot.extractMumData(folderName, "selfOut")

    dataList = alignerRobot.transformCoor(dataList)

    lenDic = IORobot.obtainLength(folderName, 'contigs.fasta')

    removeList = []
    for eachitem in dataList:
        match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[
            7], eachitem[8]

        if name1 != name2:
            l1, l2 = lenDic[name1], lenDic[name2]

            if abs(l1 - match1) < thres and abs(l2 - match2) > thres:
                removeList.append(name1)
            elif abs(l1 - match1) > thres and abs(l2 - match2) < thres:
                removeList.append(name2)
            elif abs(l1 - match1) < thres and abs(l2 - match2) < thres:
                print "Both shortembedd", eachitem

    nameList = []
    for eachitem in lenDic:
        nameList.append(eachitem)

    print len(nameList)

    for eachitem in removeList:
        if eachitem in nameList:
            nameList.remove(eachitem)
    print len(nameList)

    print "removeEmbedded: Outputting non-contained contigs to noEmbed.fasta"
    IORobot.putListToFileO(folderName, "contigs.fasta", "noEmbed", nameList)
Exemplo n.º 7
0
def removeEmbedded(folderName, mummerLink):
    print "removeEmbedded"

    thres = 10

    command = r'''perl -pe 's/>[^\$]*$/">Seg" . ++$n ."\n"/ge' ''' + folderName + "raw_reads.fasta > " + folderName + houseKeeper.globalReadName
    os.system(command)

    command = r'''perl -pe 's/>[^\$]*$/">Seg" . ++$n ."\n"/ge' ''' + folderName + "contigs.fasta > " + folderName + houseKeeper.globalContigName
    os.system(command)

    if True:
        alignerRobot.useMummerAlignBatch(mummerLink, folderName, [[
            "self", houseKeeper.globalContigName, houseKeeper.globalContigName,
            ""
        ]], houseKeeper.globalParallel)
        # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta")
        # outputName, referenceName, queryName, specialName

    dataList = alignerRobot.extractMumData(folderName, "selfOut")

    dataList = alignerRobot.transformCoor(dataList)

    lenDic = IORobot.obtainLength(folderName, houseKeeper.globalContigName)

    removeList = []
    for eachitem in dataList:
        match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[
            7], eachitem[8]

        if name1 != name2:
            l1, l2 = lenDic[name1], lenDic[name2]

            if abs(l1 - match1) < thres and abs(l2 - match2) > thres:
                removeList.append(name1)
            elif abs(l1 - match1) > thres and abs(l2 - match2) < thres:
                removeList.append(name2)
            elif abs(l1 - match1) < thres and abs(l2 - match2) < thres:
                print "Both shortembedd", eachitem

    nameList = []
    for eachitem in lenDic:
        nameList.append(eachitem)

    print len(nameList)

    for eachitem in removeList:
        if eachitem in nameList:
            nameList.remove(eachitem)
    print len(nameList)

    IORobot.putListToFileO(folderName, houseKeeper.globalContigName, "noEmbed",
                           nameList)
def removeEmbedded(folderName , mummerLink):
    print "removeEmbedded"
    
    thres = 10
    
    command= r'''perl -pe 's/>[^\$]*$/">Seg" . ++$n ."\n"/ge' ''' + folderName + "raw_reads.fasta > " + folderName  + houseKeeper.globalReadName
    os.system(command)

    command= r'''perl -pe 's/>[^\$]*$/">Seg" . ++$n ."\n"/ge' ''' + folderName + "contigs.fasta > " + folderName  + houseKeeper.globalContigName
    os.system(command)


    if True:
        alignerRobot.useMummerAlignBatch(mummerLink, folderName, [["self", houseKeeper.globalContigName, houseKeeper.globalContigName, ""]], houseKeeper.globalParallel)
        # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta")
        # outputName, referenceName, queryName, specialName
    
    dataList = alignerRobot.extractMumData(folderName, "selfOut")
    
    dataList = alignerRobot.transformCoor(dataList)
    
    lenDic = IORobot.obtainLength(folderName, houseKeeper.globalContigName)
    
    removeList = []
    for eachitem in dataList:
        match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[7], eachitem[8]
        
        if name1 != name2:
            l1, l2 = lenDic[name1], lenDic[name2]
            
            if abs(l1 - match1) < thres and abs(l2 - match2) > thres:
                removeList.append(name1)
            elif abs(l1 - match1) > thres and abs(l2 - match2) < thres:
                removeList.append(name2)
            elif abs(l1 - match1) < thres and abs(l2 - match2) < thres:
                print "Both shortembedd", eachitem
                
    
    
    nameList = []
    for eachitem in lenDic:
        nameList.append(eachitem)

    print len(nameList)
    
    for eachitem in removeList:
        if eachitem in nameList:
            nameList.remove(eachitem)
    print len(nameList)
    
    IORobot.putListToFileO(folderName, houseKeeper.globalContigName, "noEmbed", nameList)
    
def removeEmbedded(folderName, mummerLink):
    print "removeEmbedded"
    thres = 10
    os.system("sed -e 's/|//g' " + folderName + "contigs.fasta  > " + folderName + "contigs2.fasta")

    os.system("cp " + folderName + "contigs2.fasta " + folderName + "contigs.fasta")

    if True:
        print "removeEmbedded: Aligning contigs.fasta to contigs.fasta, outputs are self*.delta"
        alignerRobot.useMummerAlignBatch(
            mummerLink, folderName, [["self", "contigs.fasta", "contigs.fasta", ""]], houseKeeper.globalParallel
        )
        # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta")
        # outputName, referenceName, queryName, specialName

    print "removeEmbedded: Extracting MUMmer data from delta files to selfOut"
    dataList = alignerRobot.extractMumData(folderName, "selfOut")

    dataList = alignerRobot.transformCoor(dataList)

    lenDic = IORobot.obtainLength(folderName, "contigs.fasta")

    removeList = []
    for eachitem in dataList:
        match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[7], eachitem[8]

        if name1 != name2:
            l1, l2 = lenDic[name1], lenDic[name2]

            if abs(l1 - match1) < thres and abs(l2 - match2) > thres:
                removeList.append(name1)
            elif abs(l1 - match1) > thres and abs(l2 - match2) < thres:
                removeList.append(name2)
            elif abs(l1 - match1) < thres and abs(l2 - match2) < thres:
                print "Both shortembedd", eachitem

    nameList = []
    for eachitem in lenDic:
        nameList.append(eachitem)

    print len(nameList)

    for eachitem in removeList:
        if eachitem in nameList:
            nameList.remove(eachitem)
    print len(nameList)

    print "removeEmbedded: Outputting non-contained contigs to noEmbed.fasta"
    IORobot.putListToFileO(folderName, "contigs.fasta", "noEmbed", nameList)