Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description = "Annotate SVGTools tree")
    parser.add_argument('SVGFile', metavar = 'SVGTools file', type=str, \
        help = "The input SVGTools tree")
    parser.add_argument('insertFile', metavar = 'Insert file', type=str, \
        help = "The FASTA file of extracted insert sequences")
    parser.add_argument('--sizeScaling', nargs='?', const=1, type=float, \
        help = "Global size scaling factor ", default=0.1)
    parser.add_argument('--circleScaling', nargs='?', const=1, type=float, \
        help = "Circle scaling factor ", default=0.05)
    parser.add_argument('--fontScaling', nargs='?', const=1, type=float, \
        help = "Extra font scaling factor ", default=1)
    parser.add_argument('--renameFile', type=str, \
        help = "File for renaming leaves")
    parser.add_argument('--alwaysLabel', type=str, \
        help = "Always label leaves with this accession")
    parser.add_argument('--labelThresh', default=1, type=int, \
        help = "Minimum size of insert to label")
    
    
    args   = parser.parse_args()
    seqs = fasta.getProteinSeqs(args.insertFile)
    rename      = {}
    alwaysLabel = {}
    if args.renameFile:
        (rename,orgs) = loadRenameFile(args.renameFile)
    if args.alwaysLabel:
        alwaysLabel = loadAlwaysLabelFile(args.alwaysLabel)
    
    processSVG(args.SVGFile, seqs, args.sizeScaling, args.circleScaling, args.fontScaling, rename=orgs, alwaysLabel=alwaysLabel, labelThresh=args.labelThresh)
Exemplo n.º 2
0
def extractInserts(outputSeqs, curOrg, genomeFile):
    '''
    Extracts all flagellin insert sequences and appends them to a FASTA file.
    
    curOrg is of type organismClass
    genomeFile is the input FASTA genome file
    rename is bool: if True, rename as a integer for output as a lookup table
    '''
    seqs = fasta.getProteinSeqs(genomeFile)

    sorted_proteins = sorted(curOrg.families["FliC"].items(),
                             key=operator.itemgetter(1))[::-1]
    for protein, data in sorted_proteins:
        outputSeqs[protein] = \
            seqs[protein][data.notes['insert_start']-1:data.notes['insert_end']-1]
Exemplo n.º 3
0
def main():
    parser = OptionParser("usage: %prog <FASTA file> <list of seq IDs>")
    #parser.add_option("-s", "--svg", dest="svg", help="output an SVGTools file of this name")

    (options, args) = parser.parse_args()
    if (len(args) < 2):
        parser.error("Incorrect number of arguments!")

    seqs = fasta.getProteinSeqs(args[0])

    count = 1
    while count < len(args):
        newSeqs = {}
        newSeqs[args[count]] = seqs[args[count]]
        fasta.printFASTA(newSeqs)
        count += 1
def main():
    parser = argparse.ArgumentParser(description = "Get class stats")
    parser.add_argument('classFile', metavar = 'Class file', type=str, \
        help = "The file listing different classes")
    parser.add_argument('insertFile', metavar = 'Insert file', type=str, \
        help = "The FASTA file of extracted insert sequences")
    parser.add_argument('renameFile', type=str, \
        help = "File for renaming leaves")
    
    args      = parser.parse_args()
    seqs      = fasta.getProteinSeqs(args.insertFile)
    classes   = getClasses(args.classFile)
    rename    = loadRenameFile(args.renameFile) 
    classLens = getClassLengths(classes,seqs,rename)
    
    for the_class in classLens:
        print the_class+"\t"+str(len(classLens[the_class]))+"\t"+str(numpy.mean(classLens[the_class]))
Exemplo n.º 5
0
def extractConservedEnds(outputSeqs, curOrg, genomeFile):
    '''
    Extracts all flagellin conserved N- and C-terminal domains, 
    concatenates them both into a single sequence with excised insert,
    and appends them to a FASTA file.
    
    curOrg is of type organismClass
    genomeFile is the input FASTA genome file
    rename is bool: if True, rename as a integer for output as a lookup table
    '''

    seqs = fasta.getProteinSeqs(genomeFile)

    sorted_proteins = sorted(curOrg.families["FliC"].items(),
                             key=operator.itemgetter(1))[::-1]
    for protein, data in sorted_proteins:
        outputSeqs[protein] = \
            seqs[protein][0:(data.notes['insert_start']-1)] + \
            seqs[protein][(data.notes['insert_end']-1):-1]
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Plots an SVG of domain structures HMMs specified in a directory")
    parser.add_argument('fastaFile', metavar = 'FASTA file', type=str, \
        help = "The FASTA file to plot")
    parser.add_argument('SVGout', metavar = 'SVG output filename', type=str, \
        help = "The SVGTools output filename")
    parser.add_argument('xdomain', \
        help = "Filename of x-domain HMM (not directory -- this is HMM_DIR)")
    parser.add_argument('--hmmdir', \
        help = "Location of HMMs (overrides HMM_DIR environment variable)")
    parser.add_argument(
        '--extractSlivers',
        help="label, and extract to FAA, sliver sections > X aa")
    parser.add_argument(
        '--outputOthers',
        help="Also output stretches of sequence between annotated domains")
    args = parser.parse_args()

    # ----------
    # Init

    yInc = 40
    xScale = 0.1
    yScale = 0.5
    geneHeight = yInc * 0.4
    sliverCount = 0
    xMargin = 500
    sliverProteins = {}
    coords = {}

    #-----------
    if "HMM_DIR" in os.environ:
        HMMDir = os.environ["HMM_DIR"]
    if args.hmmdir:
        HMMDir = args.hmmdir
    else:
        if not "HMM_DIR" in os.environ:
            sys.exit(
                "ERROR: HMM_DIR not specified on command line or environment var"
            )
    proteins = fasta.getProteinSeqs(args.fastaFile)
    proteinList = fasta.getSequenceOrder(args.fastaFile)

    annotations = annotateGenes.organismClass(args.fastaFile,
                                              HMMDir,
                                              annotateAll=False)

    annotations.getStraightforwardsProtein("X", [args.xdomain], HMMDir=HMMDir)
    annotations.annotateFlagellins()

    # -----------------
    SVGFile = SVGTools.SVGClass(imageHeight=1500)

    y = yInc

    for protein in proteinList:
        slivers = [[1, len(proteins[protein])]]
        Y = (y) * yScale
        SVGFile.addItem(
            SVGTools.lineClass(x1=xMargin * xScale,
                               y1=Y,
                               x2=(xMargin + len(proteins[protein])) * xScale,
                               y2=Y))
        SVGFile.addItem(SVGTools.textClass(protein,
                                           x=0,
                                           y=(geneHeight + y) * yScale),
                        layer=1)
        y += yInc
        if protein in annotations.families["FliC"]:
            annotation = annotations.families["FliC"][protein]
            for FliCn in annotation.notes["FliCn"]:
                slivers = recalcSlivers(slivers, FliCn["from"], FliCn["to"])
                x1 = (xMargin + FliCn["from"]) * xScale
                x2 = (xMargin + FliCn["to"]) * xScale
                SVGFile.addItem(
                    SVGTools.rectClass(x=x1,
                                       y=Y - (geneHeight * 0.5),
                                       width=x2 - x1,
                                       height=geneHeight,
                                       colour=SVGTools.colourClass(r=255)))
            for FliCc in annotation.notes["FliCc"]:
                slivers = recalcSlivers(slivers, FliCc["from"], FliCc["to"])
                x1 = (xMargin + FliCc["from"]) * xScale
                x2 = (xMargin + FliCc["to"]) * xScale
                SVGFile.addItem(
                    SVGTools.rectClass(x=x1,
                                       y=Y - (geneHeight * 0.5),
                                       width=x2 - x1,
                                       height=geneHeight,
                                       colour=SVGTools.colourClass(b=255)))
        if protein in annotations.families["X"]:
            annotation = annotations.families["X"][protein]
            green = 255
            for domainHit in annotation.notes["notes"]:
                greenColour = SVGTools.colourClass(g=green)

                x1 = (xMargin + domainHit["from"]) * xScale
                x2 = (xMargin + domainHit["to"]) * xScale

                if "iEvalue" in domainHit:
                    if (domainHit["iEvalue"] < 0.001):
                        slivers = recalcSlivers(slivers, domainHit["from"],
                                                domainHit["to"])
                        SVGFile.addItem(
                            SVGTools.rectClass(x=x1,
                                               y=Y - (geneHeight * 0.5),
                                               width=x2 - x1,
                                               height=geneHeight,
                                               colour=greenColour))
                        #SVGFile.addItem(SVGTools.textClass(str(domainHit["iEvalue"]),fontsize=8, x=((x1+x2)/2),y=(geneHeight+y-yInc)*yScale),layer=1)

                #green = green * 0.75
        if args.extractSlivers:
            for sliver in slivers:
                if sliver[1] - sliver[0] >= int(args.extractSlivers):
                    _x = (xMargin + (sliver[0] + sliver[1]) / 2) * xScale
                    _y = (geneHeight + y - yInc) * yScale
                    coords[str(sliverCount)] = [_x, _y]
                    SVGFile.addItem(SVGTools.textClass(str(str(sliverCount)),
                                                       fontsize=8,
                                                       x=_x,
                                                       y=_y),
                                    layer=1)

                    sliverProteins[str(
                        sliverCount)] = proteins[protein][sliver[0]:sliver[1]]
                    sliverCount += 1

    f = open("slivers.xy", 'w')
    for sliver in sliverProteins:
        f.write(sliver + "\t" + str(int(coords[sliver][0])) + "," +
                str(int(coords[sliver][1])) + "\n")
    fasta.writeFASTA("slivers.faa", sliverProteins)
    SVGout = open(args.SVGout, 'w')
    SVGout.write(str(SVGFile) + "\n")