示例#1
0
def ppsOut2ppOut(inFile, outFile, taxonomicRanks, databaseFile):
    """
        Transforms a PPS output file into a file in the PP format.

        @param inFile: input file in the PPS format (first column: seq name, last column: ncbi taxon id)
        @param outFile: output file in the PP format
        @param taxonomicRanks: taxonomic ranks (starting from superkingdom)
        @param databaseFile: database file in the sqlite3 format
    """
    taxonomy = Taxonomy(databaseFile, taxonomicRanks)
    outBuff = csv.OutFileBuffer(outFile)
    namesList = csv.getColumnAsList(inFile,
                                    entryModifyFunction=None,
                                    colNum=0,
                                    sep='\t',
                                    comment='#')
    valCol = 1
    ncbidsList = csv.getColumnAsList(inFile,
                                     entryModifyFunction=None,
                                     colNum=valCol,
                                     sep='\t',
                                     comment='#')

    while True:  # this is not efficient!
        valCol += 1
        tmpList = csv.getColumnAsList(inFile,
                                      entryModifyFunction=None,
                                      colNum=valCol,
                                      sep='\t',
                                      comment='#')
        if len(tmpList) == len(namesList):
            ncbidsList = tmpList
        else:
            break

    header = str('#PPS file transformed to PP format, input file: ' +
                 str(inFile) + '\n#ID' + '\t' + 'root')
    for rank in taxonomicRanks:
        header += str('\t' + rank)
    outBuff.writeText(str(header + '\n'))

    for i in range(len(namesList)):
        name = namesList[i]
        ncbid = ncbidsList[i]
        taxPathDict = taxonomy.getPathToRoot(int(ncbid))
        buff = str(name)
        if taxPathDict is None:
            buff += str('\t')
        else:
            buff += str('\t' + 'root')

        for rank in taxonomicRanks:
            if (taxPathDict is not None) and (rank in taxPathDict) and (
                    not taxPathDict[rank].isCopy()):
                buff += str('\t' + taxPathDict[rank].name)
            else:
                buff += '\t'
        outBuff.writeText(str(buff + '\n'))
    outBuff.close()
    taxonomy.close()
示例#2
0
文件: pps.py 项目: algbioi/ppsplus
def ppsOut2ppOut(inFile, outFile, taxonomicRanks, databaseFile):
    """
        Transforms a PPS output file into a file in the PP format.

        @param inFile: input file in the PPS format (first column: seq name, last column: ncbi taxon id)
        @param outFile: output file in the PP format
        @param taxonomicRanks: taxonomic ranks (starting from superkingdom)
        @param databaseFile: database file in the sqlite3 format
    """
    taxonomy = Taxonomy(databaseFile, taxonomicRanks)
    outBuff = csv.OutFileBuffer(outFile)
    namesList = csv.getColumnAsList(inFile, entryModifyFunction=None, colNum=0, sep='\t', comment='#')
    valCol = 1
    ncbidsList = csv.getColumnAsList(inFile, entryModifyFunction=None, colNum=valCol, sep='\t', comment='#')

    while True:  # this is not efficient!
        valCol += 1
        tmpList = csv.getColumnAsList(inFile, entryModifyFunction=None, colNum=valCol, sep='\t', comment='#')
        if len(tmpList) == len(namesList):
            ncbidsList = tmpList
        else:
            break

    header = str('#PPS file transformed to PP format, input file: ' + str(inFile) + '\n#ID' + '\t' + 'root')
    for rank in taxonomicRanks:
        header += str('\t' + rank)
    outBuff.writeText(str(header + '\n'))

    for i in range(len(namesList)):
        name = namesList[i]
        ncbid = ncbidsList[i]
        taxPathDict = taxonomy.getPathToRoot(int(ncbid))
        buff = str(name)
        if taxPathDict is None:
            buff += str('\t')
        else:
            buff += str('\t' + 'root')

        for rank in taxonomicRanks:
            if (taxPathDict is not None) and (rank in taxPathDict) and (not taxPathDict[rank].isCopy()):
                buff += str('\t' + taxPathDict[rank].name)
            else:
                buff += '\t'
        outBuff.writeText(str(buff + '\n'))
    outBuff.close()
    taxonomy.close()