Пример #1
0
    def createSbatchFile(self):

        # Create batch file in working directory
        # From analysis get :

        #   queue
        #   expected memory
        #   number of cores
        #   time to allocate

        cwd = self.analysis.output_dir
        name = self.analysis.name

        self.batchfile = FileUtils.getDatestampedFilename(
            cwd, "PogPipe." + name, ".sbatch")
        jobname = "PogPipe." + name

        jobout = os.path.join(cwd, jobname + ".out")
        joberr = os.path.join(cwd, jobname + ".err")

        queue = self.analysis.queue
        cores = self.analysis.cores
        mempercore = self.analysis.mempercore
        time = "120"

        # Contents of slurm script is something like
        #
        # #!/usr/bin/env bash
        # #SBATCH -J myRjob <- the analysis number
        # #SBATCH -o myRjob_slurm.out   <- full path here
        # #SBATCH -e myRjob_slurm.err   <- full path here
        # #SBATCH -p informatics-dev    <- queue from analysis
        # #SBATCH -n 1                  <- number of cores?
        # #SBATCH -t 5                  <- time in minutes
        # #SBATCH --mem=100             <- memory in Mb

        str = "#!/usr/bin/env bash\n"
        str += "#SBATCH =J " + jobname + "\n"
        str += "#SBATCH -o " + jobout + "\n"
        str += "#SBATCH -e " + joberr + "\n"
        str += "#SBATCH -p " + queue + "\n"
        str += "#SBATCH -n %d" % (cores) + "\n"
        str += "#SBATCH -t %s" % (time) + "\n"
        str += "#SBATCH --mem=%d" % (mempercore) + "\n"

        # If we have an analysis id we're reading from the db.  If not then we're on the command line

        if self.analysis.id is None:
            str += "python " + settings.POGPIPEROOT + "bin/run_analysis.py -a " + name + " -i "

            str += self.analysis.input_files[0].input_file

        else:
            str += "python " + settings.POGPIPEROOT + "bin/run_analysis.py -a " + name + " -i "

            str += self.analysis.input_files[0].input_file

        self.batchfiletext = str

        FileUtils.writeTextToFile(self.batchfile, str)
Пример #2
0
    def checkDiskSpace(self):

        bytes = FileUtils.getFreeDiskSpace(self.working_dir) 
        logging.info(" ========> Analysis %20s checked disk space for %s Free space (bytes) %s Needed %s"%(self.name,self.working_dir,bytes,self.minimum_space_needed))

        if self.minimum_space_needed and bytes < self.minimum_space_needed:
            str = "Not enough disk space needed in %s.  Needs %s, available %s"%(self.working_dir,self.minimum_space_needed,bytes)
            logging.info(" ========> Analysis %20s %s"%(self.name,str))
            raise Exception(str)

        elif bytes < 10000000:
            str = "Not enough disk space needed in %s.  Needs %s, available %s"%(self.working_dir,self.minimum_space_needed,bytes)
            logging.info(" ========> Analysis %20s %s"%(self.name,str))
            raise Exception(str)


        bytes = FileUtils.getFreeDiskSpace(self.output_dir) 
        logging.info(" ========> Analysis %20s checked disk space for %s Free space (bytes) %s Needed %s"%(self.name,self.output_dir,bytes,self.minimum_space_needed))

        if self.minimum_space_needed and bytes < self.minimum_space_needed:
            str = "Not enough disk space needed in %s.  Needs %s, available %s"%(self.output_dir,self.minimum_space_needed,bytes)
            logging.info(" ========> Analysis %20s %s"%(self.name,str))
            raise Exception(str)

        elif bytes < 10000000:
            str = "Not enough disk space needed in %s.  Needs %s, available %s"%(self.output_dir,self.minimum_space_needed,bytes)
            logging.info(" ========> Analysis %20s %s"%(self.name,str))
            raise Exception(str)
Пример #3
0
    def checkDiskSpace(anaobj):
        
        print anaobj.currentstatus
        print anaobj.working_dir
        
        bytes = FileUtils.getFreeDiskSpace(anaobj.working_dir) 
        logging.info(" ========> Analysis %20s checked disk space for %s Free space (bytes) %s Needed %s"%(anaobj.name,anaobj.working_dir,bytes,anaobj.minimum_space_needed))

        if anaobj.minimum_space_needed and bytes < anaobj.minimum_space_needed:
            str = "Not enough disk space needed in %s.  Needs %s, available %s"%(anaobj.working_dir,anaobj.minimum_space_needed,bytes)
            logging.info(" ========> Analysis %20s %s"%(anaobj.name,str))
            raise Exception(str)

        elif bytes < 10000000:
            str = "Not enough disk space needed in %s.  Needs %s, available %s"%(anaobj.working_dir,anaobj.minimum_space_needed,bytes)
            logging.info(" ========> Analysis %20s %s"%(anaobj.name,str))
            raise Exception(str)


        bytes = FileUtils.getFreeDiskSpace(anaobj.output_dir) 
        logging.info(" ========> Analysis %20s checked disk space for %s Free space (bytes) %s Needed %s"%(anaobj.name,anaobj.output_dir,bytes,anaobj.minimum_space_needed))

        if anaobj.minimum_space_needed and bytes < anaobj.minimum_space_needed:
            str = "Not enough disk space needed in %s.  Needs %s, available %s"%(anaobj.output_dir,anaobj.minimum_space_needed,bytes)
            logging.info(" ========> Analysis %20s %s"%(anaobj.name,str))
            raise Exception(str)

        elif bytes < 10000000:
            str = "Not enough disk space needed in %s.  Needs %s, available %s"%(anaobj.output_dir,anaobj.minimum_space_needed,bytes)
            logging.info(" ========> Analysis %20s %s"%(anaobj.name,str))
            raise Exception(str)
Пример #4
0
    def testGetFreeGbDiskSpace(self):
        """Check we can find the free disk space for our working directory"""

        out = FileUtils.getFreeGbDiskSpace("/tmp/")

        print " GBytes free are %d" % out

        self.assertTrue(out > 0)
Пример #5
0
    def testGetDiskUsage(self):
        """Check we can find the free disk space for our working directory"""

        out = FileUtils.getDiskUsage("/tmp/")

        print " Disk usage is %s " % out

        self.assertTrue(out > 0)
Пример #6
0
    def testGetPercentFreeDiskSpace(self):
        """Check we can find the free disk space for our working directory"""

        out = FileUtils.getPercentFreeDiskSpace("/tmp/")

        print " Percent free is %d" % out

        self.assertTrue(out >= 0 and out <= 100)
Пример #7
0
    def postProcessOutput(anaobj):
        """ Checks expected output files exist in the working directory and copies them over to the output directory """

        logging.info(" ========> Analysis %20s called postProcessOutput:"%(anaobj.name))

        if AnalysisUtils.checkExpectedOutputFiles(anaobj) == False:
            raise Exception("Missing expected output files. Number missing are [%d]"%(len(anaobj.missing_output_files)))

        FileUtils.checkDirExists(anaobj.output_dir)

        tmpfiles = []

        logging.info(" ========> Analysis %20s called postProcessOutput: Moving files from %s to %s "%(anaobj.name,anaobj.working_dir,anaobj.output_dir))
        try:
            for srcfileobj in anaobj.expected_output_files:
                srcfile     = srcfileobj.expected_output_file
                
                fullsrcfile = os.path.join(anaobj.working_dir,srcfile)
                destfile    = os.path.join(anaobj.output_dir,srcfile)

                FileUtils.checkDirExistsForFile(destfile)

                res = shutil.move(fullsrcfile,destfile)

                if res == None:
                    res = "OK"
                else:
                    res = "FAILED"

                print "Checking %s"%destfile
                tmpfiles.append(destfile)
                
                logging.info(" ========> Analysis %20s called postProcessOutput: Result of file move for  %s = %s" % (anaobj.name,srcfile,res))

        except Exception as e:
            logging.info(" ========> Analysis %20s file move failed %s"%(anaobj.name,e))
            raise

        anaobj.output_files = []
        for i in tmpfiles:
            AnalysisUtils.addOutputFile(anaobj,i)

        for f in anaobj.temp_output_files:
            logging.info(" ========> Analysis %20s removing temp file %s "%(anaobj.name,f))
            res = os.remove(f) 
Пример #8
0
    def setUp(self):
        
        if FileUtils.fileExists(settings.TESTDBNAME):  os.remove(settings.TESTDBNAME)

        settings.DBNAME = settings.TESTDBNAME

        init_database()
                    
        Session = sessionmaker(bind=settings.ENGINE)
        self.session = Session()
Пример #9
0
    def __init__(self, deltafile, refseqs, qryseqs):

        self.deltafile = deltafile

        self.refseqs = refseqs
        self.qryseqs = qryseqs

        if not FileUtils.fileExists(self.deltafile):
            raise Exception(
                "Can't parse Mummer delta file.  File [%s] doesn't exist" %
                self.deltafile)
Пример #10
0
    def getCommands(self):
        self.commands = []
        self.output_files = []

        self.checkDiskSpace()

        seqs = FastaFile.getSequenceDict(self.refgenome, False)

        if self.checkInputFiles() == False:
            raise Exception("Input files [%s] don't exist = can't continue" %
                            (self.input_files))

        fileparts = FileUtils.getFileParts(self.input_files[0])

        self.basename = fileparts['basename']

        # Need to set dbtype somewhere

        outfile1 = self.working_dir + "/" + self.basename + ".raw.vcf"
        outfile2 = self.working_dir + "/" + self.basename + ".flt.vcf"

        regstr = ""

        if self.regionstr != "":
            regstr = " -r " + self.regionstr
            outfile1 = self.working_dir + "/" + self.basename + "." + self.regionstr + ".raw.vcf"
            outfile2 = self.working_dir + "/" + self.basename + "." + self.regionstr + ".flt.vcf"

        self.expected_output_files.append(outfile1)
        self.expected_output_files.append(outfile2)

        command1 = self.samtools + " mpileup -uf " + self.refgenome + " " + self.input_files[
            0] + " " + regstr + " | " + self.bcftools + " view " + " -bvcg -  > " + outfile1

        command2 = self.bcftools + " view " + outfile1 + " | " + self.vcfutils + " varFilter -D100 > " + outfile2

        print "Command %s" % command1
        print "Command %s" % command2

        self.commands.append(
            AnalysisCommand(command=command1,
                            command_rank=len(self.commands) + 1))
        self.commands.append(
            AnalysisCommand(command=command2,
                            command_rank=len(self.commands) + 1))

        return self.commands
Пример #11
0
    def getCommands(self):
        self.commands = []
        self.output_files = []

        self.checkDiskSpace()

        print "Reading genome file"
        seqs = FastaFile.getSequenceDict(self.refgenome,False)

        if self.checkInputFiles() == False:
            raise Exception("Input files [%s] don't exist = can't continue"%(self.input_files))


        fileparts = FileUtils.getFileParts(self.input_files[0])

        self.basename = fileparts['basename']

        for seq in seqs:

            len =  seqs[seq]['len']
        
            i = 1

            while i < len:
                end = i + self.chunk -1

                if end > len:
                    end = len

                regionstr = "%s:%d-%d"%(seq,i,end)

                tmpana = AnalysisFactory.createAnalysisFromModuleName("SamtoolsMpileup")

                tmpana.setInputFiles(self.input_files,self.input_types)

                tmpana.refgenome = self.refgenome
                tmpana.regionstr = regionstr
                tmpana.init()

                tmpcmds = tmpana.getCommands()

                for cmd in tmpcmds:
                    self.commands.append(cmd)

                i = i + self.chunk

        return self.commands
Пример #12
0
    def setUp(self):

        if FileUtils.fileExists(settings.TESTDBNAME):
            os.remove(settings.TESTDBNAME)

        settings.DBNAME = settings.TESTDBNAME

        init_database()

        #input_files = ['pog1.fa','pog2.fa','pog3.fa']
        #input_types = ['fasta','fasta','fasta']
        #self.ana = Analysis("FastQC")

        self.input_files = ["testdata/FoxP2_SL167.fastq"]
        self.input_types = ['fastq']

        Session = sessionmaker(bind=settings.ENGINE)
        self.session = Session()
Пример #13
0
    def testRunMummer(self):

        
        mummer = Mummer()

        self.assertTrue(mummer)
        self.assertTrue(AnalysisUtils.setInputFiles(mummer,self.input_files,['fasta','fasta']))

        runner = AnalysisRunner(mummer)
        

        self.assertTrue(runner.run())

        self.assertTrue(len(mummer.output_strings) == 1)

        self.assertTrue(mummer.output_strings[0].output_string.index('4: FINISHING DATA') > 0)


        self.assertTrue(FileUtils.fileExists('../testout/mummer.delta'))
Пример #14
0
    def getCommands(self):

        self.checkDiskSpace()

        if self.checkInputFiles() == False:
            raise Exception("Input files [%s] don't exist = can't continue" %
                            (self.input_files))

        fileparts = FileUtils.getFileParts(self.input_files[0])

        self.basename = fileparts['basename']

        # Need to set dbtype somewhere

        command = self.makeblastdb + " -in " + self.input_files[
            0] + " -input_type fasta -dbtype prot -title " + self.basename + " -parse_seqids -out " + fileparts[
                'dirname'] + "/" + self.basename

        print "Command %s" % command

        self.commands.append(command)

        return self.commands
Пример #15
0
    def init(self):
        super(FastQCAnalysis, self).init()

        if len(self.input_files) == 0:
            raise Exception(
                "No input files for FastQCAnalysis module. Can't init")

        fileparts = FileUtils.getFileParts(self.input_files[0].input_file)

        if fileparts['fileext'] == ".fastq":
            dir = fileparts['filestub'] + "_fastqc/"
        elif fileparts['fileext'] == ".gz":
            dir = fileparts['filestub'].replace(".fastq", "") + "_fastqc/"
        else:
            dir = fileparts['basename'] + "_fastqc/"

        self.fastqc_dir = dir

        tmp = []

        for i, f in enumerate(self.expected_output_filelist):
            #tmp.append(dir + f)
            AnalysisUtils.addExpectedOutputFile(self, dir + f)
Пример #16
0
    def tearDown(self):

        deltafile = '../testout/mummer.delta'

        if FileUtils.fileExists(deltafile):
            os.remove(deltafile)
Пример #17
0
    def init(self):

        if len(self.input_files) == 0:
            raise Exception("No input files for BlastOutput6Parsermodule. Can't init")

        fileparts = FileUtils.getFileParts(self.input_files[0])
Пример #18
0
    def testGetFilesInDirectory(self):
        """Check we can return an array of files in a directory"""

        files = FileUtils.getAllFilesInDirectory("/tmp/")

        self.assertTrue(len(files) > 0)
Пример #19
0
 def checkBinary(self,binfile):
     return FileUtils.fileExists(binfile)
Пример #20
0
"""Unit test for Analysis.py"""

import os
import sys
import unittest
import logging

sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../")

from config import settings
from datamodel.FileUtils import FileUtils
from datamodel.database.DB import Analysis, init_database
from datamodel.database.AnalysisUtils import AnalysisUtils
from sqlalchemy.orm import sessionmaker

if FileUtils.fileExists(settings.TESTLOGFILE): os.remove(settings.TESTLOGFILE)

formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh = logging.FileHandler(settings.TESTLOGFILE, 'a')
fh.setFormatter(formatter)

log = logging.getLogger()  # root logger

for hdlr in log.handlers:  # remove all old handlers
    log.removeHandler(hdlr)

log.addHandler(fh)  # set the new handler


class DBAnalysisTest(unittest.TestCase
Пример #21
0
    def getCommands(self):

        if self.commands and len(self.commands) > 0:
            return self.commands

        logging.info(" ========> Analysis %20s Getting commands" % (self.name))

        self.commands = []
        self.expected_output_files = []
        self.temp_output_files = []

        outdir = self.output_dir
        tmpdir = self.working_dir

        btbin = self.bowtiebindir + self.bowtiebinname
        stbin = self.samtoolsbindir + self.samtoolsbinname

        self.calculateSpaceNeeded()

        if FileUtils.fileExists(btbin) == False:
            raise Exception("Binary file [%s] doesn't exist = can't continue" %
                            btbin)

        if FileUtils.fileExists(stbin) == False:
            raise Exception("Binary file [%s] doesn't exist = can't continue" %
                            stbin)

        if AnalysisUtils.checkInputFiles(self) == False:
            raise Exception("Input files [%s] don't exist = can't continue" %
                            (self.input_files))

        AnalysisUtils.checkDiskSpace(self)

        for fobj in self.input_files:
            f = fobj.input_file
            try:

                if f.endswith(".gz"):
                    #  f = "<( zcat -c " + f + " )"
                    tmpf = f.replace(".gz", "")
                    fparts = FileUtils.getFileParts(tmpf)
                    command = "gunzip -c " + f + " > " + tmpdir + "/" + fparts[
                        'basename']
                    self.commands.append(command)
                    self.temp_output_files.append(tmpf)
                    f = tmpdir + "/" + fparts['basename']

                fparts = FileUtils.getFileParts(f)
                fstub = fparts['filestub']

                bowtieoutfile = tmpdir + "/" + fstub + ".sam"
                samtoolsoutfile = tmpdir + "/" + fstub + ".bam"

                if self.param == None:
                    raise Exception(
                        "No parameters entered for bowtie = needs -x <genomeindex>"
                    )

                command1 = btbin + " " + self.param + " " + f + " | " + stbin + " view -bS - | " + stbin + " sort - " + tmpdir + "/" + fstub

                logging.info(" ========> Analysis %20s command 1 : %s" %
                             (self.name, command1))

                #command2 = stbin + " view -bS " + bowtieoutfile + "| " + stbin + " sort - " + tmpdir + "/" + fstub

                #                logging.info(" ========> Analysis %20s command 2 : %s" % (self.name,command2))

                command2 = stbin + " index " + samtoolsoutfile

                logging.info(" ========> Analysis %20s command 3 : %s" %
                             (self.name, command2))

                # self.expected_output_files.append(fstub + ".sam")
                self.expected_output_files.append(
                    AnalysisExpectedOutputFile(expected_output_file=fstub +
                                               ".bam"))
                self.expected_output_files.append(
                    AnalysisExpectedOutputFile(expected_output_file=fstub +
                                               ".bam.bai"))

                self.commands.append(AnalysisCommand(command=command1))
                self.commands.append(AnalysisCommand(command=command2))
                #self.commands.append(command3)

            except Exception as e:
                logging.info(
                    " ========> Analysis %20s Failed building command list [%s]"
                    % (self.name, e))
                raise

        return self.commands