Exemplo n.º 1
0
        #create output directories
        if not os.path.isdir(os.path.join(outputdir, 'annotate_misc')):
            os.makedirs(os.path.join(outputdir, 'annotate_misc'))
            os.makedirs(os.path.join(outputdir, 'annotate_results'))
        else:
            lib.log.error(
                "Output directory %s already exists, will use any existing data.  If this is not what you want, exit, and provide a unique name for output folder"
                % (outputdir))
        lib.log.info("Parsing input files")
        Scaffolds = os.path.join(outputdir, 'annotate_misc',
                                 'genome.scaffolds.fasta')
        Proteins = os.path.join(outputdir, 'annotate_misc',
                                'genome.proteins.fasta')
        Transcripts = os.path.join(outputdir, 'annotate_misc',
                                   'genome.transcripts.fasta')
        lib.gb2output(genbank, Proteins, Transcripts, Scaffolds)

#make sure logfiles directory is present, will need later
if not os.path.isdir(os.path.join(outputdir, 'logfiles')):
    os.makedirs(os.path.join(outputdir, 'logfiles'))

#get absolute path for all input so there are no problems later, not using Transcripts yet could be error? so take out here
Proteins = os.path.abspath(Proteins)
genbank = os.path.abspath(genbank)

if 'phobius' in args.methods or 'all' in args.methods:
    #run Phobius to predict secreted proteins and membrane, default is local if installed, otherwise remote
    phobius_out = os.path.join(outputdir, 'annotate_misc',
                               'phobius.results.txt')
    phobiusLog = os.path.join(outputdir, 'logfiles', 'phobius.log')
    lib.log.info(
Exemplo n.º 2
0
        #create output directories
        if not os.path.isdir(os.path.join(outputdir, 'annotate_misc')):
            os.makedirs(os.path.join(outputdir, 'annotate_misc'))
            os.makedirs(os.path.join(outputdir, 'annotate_results'))
        else:
            lib.log.error(
                "Output directory %s already exists, will use any existing data.  If this is not what you want, exit, and provide a unique name for output folder"
                % (outputdir))
        lib.log.info("Parsing input files")
        Scaffolds = os.path.join(outputdir, 'annotate_misc',
                                 'genome.scaffolds.fasta')
        Proteins = os.path.join(outputdir, 'annotate_misc',
                                'genome.proteins.fasta')
        Transcripts = os.path.join(outputdir, 'annotate_misc',
                                   'genome.transcripts.fasta')
        lib.gb2output(genbank, Proteins, Transcripts, Scaffolds)

#get absolute path for all input so there are no problems later, not using Transcripts yet could be error? so take out here
Scaffolds, Proteins, GFF = [
    os.path.abspath(i) for i in [Scaffolds, Proteins, GFF]
]  #suggestion via GitHub

#get organism and isolate from GBK file
if not args.species:
    if genbank != '':
        with open(genbank, 'rU') as gbk:
            SeqRecords = SeqIO.parse(gbk, 'genbank')
            for record in SeqRecords:
                for f in record.features:
                    if f.type == "source":
                        organism = f.qualifiers.get("organism", ["???"])[0]
        lib.log.error("Properly formatted 'funannotate predict' files do no exist in this directory")
        os._exit(1)
    else:
        if inputdir == outputdir: #I'm assuming means that user supplied the predict folder (probably bad to assume...)
            outputdir = lib.get_parent_dir(inputdir)
        #create output directories
        if not os.path.isdir(os.path.join(outputdir, 'annotate_misc')):
            os.makedirs(os.path.join(outputdir, 'annotate_misc'))
            os.makedirs(os.path.join(outputdir, 'annotate_results'))
        else:
            lib.log.error("Output directory %s already exists, will use any existing data.  If this is not what you want, exit, and provide a unique name for output folder" % (outputdir))
        lib.log.info("Parsing input files")
        Scaffolds = os.path.join(outputdir, 'annotate_misc', 'genome.scaffolds.fasta')
        Proteins = os.path.join(outputdir, 'annotate_misc','genome.proteins.fasta')
        Transcripts = os.path.join(outputdir, 'annotate_misc', 'genome.transcripts.fasta')
        lib.gb2output(genbank, Proteins, Transcripts, Scaffolds)

#get absolute path for all input so there are no problems later
for i in Scaffolds, Proteins, Transcripts, GFF:
    i = os.path.abspath(i)
        

#get organism and isolate from GBK file
if not args.species:
    with open(genbank, 'rU') as gbk:
        SeqRecords = SeqIO.parse(gbk, 'genbank')
        for record in SeqRecords:
            for f in record.features:
                if f.type == "source":
                    organism = f.qualifiers.get("organism", ["???"])[0]
                    if not args.isolate: