Exemplo n.º 1
0
def gather():
    util.debug("Gathering result")
    job_results = results
    paths.ensure(job_results)
    
    
    #aazj00103_05.075_v1_3.bz2  zj001.fasta    zj001.psipred_ss2
    #aazj00109_05.075_v1_3.bz2  zj001.psipred
    aa_pattern = "aa"+name+"(03|09)\_05\.075\_v1\_3$"
    patterns = [aa_pattern]
    for suf in [".fasta", ".psipred", ".psipred_ss2"]:
        patterns.append(name+suf)
    
    try:
        for pattern in patterns:
            print "Pattern ",pattern
            match = paths.find(pattern, scr_job)
            if not match:
                raise "Missing file"
            for path in match:
                if pattern == aa_pattern:
                    print "Filtering Columns %s" % path
                    for line in fileinput.input(path, inplace=1):
                        print " "+line[:47].strip()
                    fileinput.close()
                print "Path ", path
                file = paths.getFile(path)
                dest = paths.join(job_results, file)
                util.copy(path, job_results)
                if pattern == aa_pattern:
                    util.system("bzip2 %s"  % dest)
    except:
        paths.removerf(job_results)
        raise
Exemplo n.º 2
0
 def _filter_fraglibs(self):
 # Runs on fragment picking result 3' and 9' fragment libraries (must exist from Frag run).
 # Filters the file inplace: writes " " + first 47 chars of the line. (IBM formatting).
 # Also uses bzip2 binary to compress fragment library files (IBM formatting). Can do with bz2 library.
     
     if self.frag3_lib == None or self.frag9_lib == None:
         raise Exception("Fragment library results files do not yet exist. Run Fragmentor.run(...) first.")
     
     frag_libs = [self.frag3_lib, self.frag9_lib]
     for frag_lib in frag_libs:
         # Filter frag library columns.
         for line in fileinput.input(frag_lib, inplace=1):
             print " "+line[:47].strip()
         fileinput.close()
         
         # Bzip frag library file and reset instance variable to reflect new name.
         cmd = "bzip2 {0}".format(frag_lib)
         system(cmd)
      
     # Update instance variable values to reflect bzip on fragment files.
     self.results_files.remove(self.frag3_lib)
     self.results_files.remove(self.frag9_lib)
     self.frag3_lib = self.frag3_lib+".bz2"
     self.frag9_lib = self.frag9_lib+".bz2"
     self.results_files.append(self.frag3_lib)
     self.results_files.append(self.frag9_lib)
Exemplo n.º 3
0
def gather():
    util.debug("Gathering result")
    job_results = results
    paths.ensure(job_results)

    #aazj00103_05.075_v1_3.bz2  zj001.fasta    zj001.psipred_ss2
    #aazj00109_05.075_v1_3.bz2  zj001.psipred
    aa_pattern = "aa" + name + "(03|09)\_05\.075\_v1\_3$"
    patterns = [aa_pattern]
    for suf in [".fasta", ".psipred", ".psipred_ss2"]:
        patterns.append(name + suf)

    try:
        for pattern in patterns:
            print "Pattern ", pattern
            match = paths.find(pattern, scr_job)
            if not match:
                raise "Missing file"
            for path in match:
                if pattern == aa_pattern:
                    print "Filtering Columns %s" % path
                    for line in fileinput.input(path, inplace=1):
                        print " " + line[:47].strip()
                    fileinput.close()
                print "Path ", path
                file = paths.getFile(path)
                dest = paths.join(job_results, file)
                util.copy(path, job_results)
                if pattern == aa_pattern:
                    util.system("bzip2 %s" % dest)
    except:
        paths.removerf(job_results)
        raise
Exemplo n.º 4
0
def gather():
    global code, task_id

    utilities.debug("Gathering result")
    paths.ensure(results_dir)
    
    # Contents of results directory (3' library, 9' library, fasta, psipred, and psipred_ss2 files). EG:
    #aazj00103_05.075_v1_3.bz2  zj001.fasta    zj001.psipred_ss2
    #aazj00109_05.075_v1_3.bz2  zj001.psipred
    
    # Set "patterns" for results files.
    aa_pattern = "aa"+code+task_id+"(03|09)\_05\.075\_v1\_3$"
    patterns = [aa_pattern]
    for suf in [".fasta", ".psipred", ".psipred_ss2"]:
        patterns.append(code+task_id+suf)
    
    try:
        for pattern in patterns:
            print "Pattern ",pattern
            
            # Find the results file in scr_job_dir
            match = paths.find(pattern, scr_job_dir)
            
            # If a results file matching pattern is not found in the scr_jobs_dir, raise exception.
            if not match:
                raise "Missing file"
            
            # Copy all results files in scr_job_dir to results_dir. 
            for path in match:
                # If the results file is a frag library, format the frag library file.
                if pattern == aa_pattern:
                    print "Filtering Columns of file %s" % path
                    # fileinput.input opens a file for iteration. Defining inplace=1 allows to given file to be
                    # altered "in place": file is backed up, and stdout is then directed to the file. 
                    # Filters the file: writes " " + first 47 chars of the line. (Specific formatting).
                    for line in fileinput.input(path, inplace=1):
                        print " "+line[:47].strip()
                    fileinput.close()
                print "Path ", path
                
                # Copy the result file to results_dir.
                utilities.copy(path, results_dir)
                
                # If the results file is a frag library, bzip it in the results_dir.
                if pattern == aa_pattern:
                    dest = paths.join(results_dir, paths.getFile(path))
                    utilities.system("bzip2 %s"  % dest)
                    
    # If gathering the results files fails, delete the results_dir and exit.
    except:
        paths.removerf(results_dir)
        raise
Exemplo n.º 5
0
def gather():
    global code, task_id

    utilities.debug("Gathering result")
    paths.ensure(results_dir)

    # Contents of results directory (3' library, 9' library, fasta, psipred, and psipred_ss2 files). EG:
    #aazj00103_05.075_v1_3.bz2  zj001.fasta    zj001.psipred_ss2
    #aazj00109_05.075_v1_3.bz2  zj001.psipred

    # Set "patterns" for results files.
    aa_pattern = "aa" + code + task_id + "(03|09)\_05\.075\_v1\_3$"
    patterns = [aa_pattern]
    for suf in [".fasta", ".psipred", ".psipred_ss2"]:
        patterns.append(code + task_id + suf)

    try:
        for pattern in patterns:
            print "Pattern ", pattern

            # Find the results file in scr_job_dir
            match = paths.find(pattern, scr_job_dir)

            # If a results file matching pattern is not found in the scr_jobs_dir, raise exception.
            if not match:
                raise "Missing file"

            # Copy all results files in scr_job_dir to results_dir.
            for path in match:
                # If the results file is a frag library, format the frag library file.
                if pattern == aa_pattern:
                    print "Filtering Columns of file %s" % path
                    # fileinput.input opens a file for iteration. Defining inplace=1 allows to given file to be
                    # altered "in place": file is backed up, and stdout is then directed to the file.
                    # Filters the file: writes " " + first 47 chars of the line. (Specific formatting).
                    for line in fileinput.input(path, inplace=1):
                        print " " + line[:47].strip()
                    fileinput.close()
                print "Path ", path

                # Copy the result file to results_dir.
                utilities.copy(path, results_dir)

                # If the results file is a frag library, bzip it in the results_dir.
                if pattern == aa_pattern:
                    dest = paths.join(results_dir, paths.getFile(path))
                    utilities.system("bzip2 %s" % dest)

    # If gathering the results files fails, delete the results_dir and exit.
    except:
        paths.removerf(results_dir)
        raise
Exemplo n.º 6
0
def run_blast(query_file):
    if not isfile(query_file):
        raise Exception("Given blast query file {0} not a valid file".format(query_file))
    blast_outfile = query_file + '.blast.xml'
    blast_cmd = "{0} -i {1} -d {2} -e {3} -v {4} -b {5} -a {6} -m {7} -o {8}".format( \
                blast_exe, query_file, source_db, e_val, results, alignments, processors, output_mode, blast_outfile)
    print "Blast command: {0}".format(blast_cmd)
    try:
        system(blast_cmd)
    except Exception as e:
        print e
        print "Blasting query file {0} against DB {1} failed".format(query_file, source_db)
        raise
Exemplo n.º 7
0
def run_blast(query_file):
    if not isfile(query_file):
        raise Exception(
            "Given blast query file {0} not a valid file".format(query_file))
    blast_outfile = query_file + '.blast.xml'
    blast_cmd = "{0} -i {1} -d {2} -e {3} -v {4} -b {5} -a {6} -m {7} -o {8}".format( \
                blast_exe, query_file, source_db, e_val, results, alignments, processors, output_mode, blast_outfile)
    print "Blast command: {0}".format(blast_cmd)
    try:
        system(blast_cmd)
    except Exception as e:
        print e
        print "Blasting query file {0} against DB {1} failed".format(
            query_file, source_db)
        raise
Exemplo n.º 8
0
 def _run_frag(self, nohoms, fasta):
 # Takes a boolean nohoms (to pass to script) and the fasta input (filename).
 
     # Check to see if script given in config file exists.
     if not os.path.isfile(self.frag_script):
         raise Exception("Fragment picking {0} script specified in config file ({1}) is not" \
                         "accessible.".format(self.frag_script, cofig_file))
     
     # Run the given script on fasta file. Current options: ?-nohoms -nosam -verbose.
     if nohoms:
         cmd = "{0} -nohoms -nosam -verbose {1}".format(self.frag_script, fasta)
     else:
         cmd = "{0} -nosam -verbose {1}".format(self.frag_script, fasta)
     
     print "Fragmentor: executing frag command {0}".format(cmd)
     system(cmd)
Exemplo n.º 9
0
def runScript(no_homs=False):
    # Script requires the following environment variables
    # BLAST_DIR
    # NR_DIR
    # NNMAKE_DIR - the BLOSUM score matrices
    # PSIPRED_DIR
    # NNMAKE_DIR
    # JUFO_DIR
    # SAM_DIR
    # SAM_2ND_DIR - SAM Secondary Structure Prediction
    utilities.debug("Running fragmentation script")
    
    env = {
           'BLAST_DIR':'%s/blast' % tools_dir,
           'NR_DIR':'%s/db/nr' % scr_dir,
           'NNMAKEDB_DIR':'%s/db/nnmake_database' % scr_dir,
           'NNMAKE_SHORT_DIR':'%s/nnmake' % tools_dir,
           'PSIPRED_DIR':'%s/psipred' % tools_dir,
           'JUFO_DIR':'%s/jufo' % tools_dir,
           'SAM_DIR':'%s/sam' % tools_dir,
           'SAM_2ND_DIR':'%s/sam.predict-2nd' % tools_dir
           }
    
    for key in env :
        # Check if all paths given in env exist.
        paths.existsOrFail(env[key])
        
        # os.environ is the system environment. New env. vars. can be set by adding to it.
        os.environ[key] = env[key] 
    
    script = paths.join(home, "scripts", frag_script)
    
    if no_homs:
        script = "%s -nohoms" % script
    fasta = paths.getFile(fasta_file)
    cmd = "cd %s && %s -verbose -nosam %s" % (scr_job_dir, script, fasta)
    #cmd = "cd %s && %s -verbose %s" % (scr_job_dir, script, fasta)
    utilities.system(cmd)
Exemplo n.º 10
0
def runScript(no_homs=False):
    # Script requires the following environment variables
    # BLAST_DIR
    # NR_DIR
    # NNMAKE_DIR - the BLOSUM score matrices
    # PSIPRED_DIR
    # NNMAKE_DIR
    # JUFO_DIR
    # SAM_DIR
    # SAM_2ND_DIR - SAM Secondary Structure Prediction
    utilities.debug("Running fragmentation script")

    env = {
        'BLAST_DIR': '%s/blast' % tools_dir,
        'NR_DIR': '%s/db/nr' % scr_dir,
        'NNMAKEDB_DIR': '%s/db/nnmake_database' % scr_dir,
        'NNMAKE_SHORT_DIR': '%s/nnmake' % tools_dir,
        'PSIPRED_DIR': '%s/psipred' % tools_dir,
        'JUFO_DIR': '%s/jufo' % tools_dir,
        'SAM_DIR': '%s/sam' % tools_dir,
        'SAM_2ND_DIR': '%s/sam.predict-2nd' % tools_dir
    }

    for key in env:
        # Check if all paths given in env exist.
        paths.existsOrFail(env[key])

        # os.environ is the system environment. New env. vars. can be set by adding to it.
        os.environ[key] = env[key]

    script = paths.join(home, "scripts", frag_script)

    if no_homs:
        script = "%s -nohoms" % script
    fasta = paths.getFile(fasta_file)
    cmd = "cd %s && %s -verbose -nosam %s" % (scr_job_dir, script, fasta)
    #cmd = "cd %s && %s -verbose %s" % (scr_job_dir, script, fasta)
    utilities.system(cmd)
Exemplo n.º 11
0
def runScript():
    # Script requires the following environment variables
    # BLAST_DIR
    # NR_DIR
    # NNMAKE_DIR - the BLOSUM score matrices
    # PSIPRED_DIR
    # NNMAKE_DIR
    # JUFO_DIR
    # SAM_DIR
    # SAM_2ND_DIR - SAM Secondary Structure Prediction
    util.debug("Running fragmentation script")
    
    env = {
           'BLAST_DIR':'%s/blast' % shareware,
           'NR_DIR':'%s/db/nr' % scr,
           'NNMAKEDB_DIR':'%s/db/nnmake_database' % scr,
           'NNMAKE_SHORT_DIR':'%s/nnmake' % shareware,
           'PSIPRED_DIR':'%s/psipred' % shareware,
           'JUFO_DIR':'%s/jufo' % shareware,
           'SAM_DIR':'%s/sam' % shareware,
           'SAM_2ND_DIR':'%s/sam.predict-2nd' % shareware
           }
    
    for key in env :
        # Make sure all paths in 'env' exist in the system.
        paths.existsOrFail(env[key])
        # Put 'env' values into the system environment.
        os.environ[key] = env[key] 
    
    script = paths.join(home, "scripts", pl_script)
    if no_homs:
        script = "%s -nohoms" % script
    fasta = paths.getFile(scr_fasta)
    cmd = "cd %s && %s -verbose -nosam %s" % (scr_job, script, fasta)
    #cmd = "cd %s && %s -verbose %s" % (scr_job, script, fasta)
    util.system(cmd)
Exemplo n.º 12
0
def runScript():
    # Script requires the following environment variables
    # BLAST_DIR
    # NR_DIR
    # NNMAKE_DIR - the BLOSUM score matrices
    # PSIPRED_DIR
    # NNMAKE_DIR
    # JUFO_DIR
    # SAM_DIR
    # SAM_2ND_DIR - SAM Secondary Structure Prediction
    util.debug("Running fragmentation script")

    env = {
        'BLAST_DIR': '%s/blast' % shareware,
        'NR_DIR': '%s/db/nr' % scr,
        'NNMAKEDB_DIR': '%s/db/nnmake_database' % scr,
        'NNMAKE_SHORT_DIR': '%s/nnmake' % shareware,
        'PSIPRED_DIR': '%s/psipred' % shareware,
        'JUFO_DIR': '%s/jufo' % shareware,
        'SAM_DIR': '%s/sam' % shareware,
        'SAM_2ND_DIR': '%s/sam.predict-2nd' % shareware
    }

    for key in env:
        # Make sure all paths in 'env' exist in the system.
        paths.existsOrFail(env[key])
        # Put 'env' values into the system environment.
        os.environ[key] = env[key]

    script = paths.join(home, "scripts", pl_script)
    if no_homs:
        script = "%s -nohoms" % script
    fasta = paths.getFile(scr_fasta)
    cmd = "cd %s && %s -verbose -nosam %s" % (scr_job, script, fasta)
    #cmd = "cd %s && %s -verbose %s" % (scr_job, script, fasta)
    util.system(cmd)
Exemplo n.º 13
0
def removerf(path):
    from hpf.utilities import system
    system("rm -rf %s" % path)