Пример #1
0
    def validate_run(self, log, info, exit_code, stdout):
        if 'max_rt_diff = self._stdev_max_rt_per_run * tr_data.getStdev(source, target)' in stdout:
            raise RuntimeError("No peptides found which are shared between all runs. Try to increase 'alignment_score'.")
        validation.check_stdout(log, stdout)
        validation.check_exitcode(log, exit_code)
        validation.check_file(log, info['ALIGNMENT_TSV'])
        validation.check_file(log, info['ALIGNMENT_YAML'])

        out2log = os.path.join(info[Keys.WORKDIR], "feature_alignment.out.txt")
        f = open(out2log, "w")
        f.write(stdout)
        f.close()
        info["ALIGNER_STDOUT"] = out2log

        # Move out .tr files of pyprophet to be rescue safe
        info["TRAFO_FILES"] = []
        for fil in info["MPROPHET_TSV"]:
            trfile = glob.glob(os.path.dirname(fil) + "/*.tr")
            if len(trfile) != 1:
                raise RuntimeError("More than one .tr file for " + fil)
            basename = os.path.basename(trfile[0])
            tgt = os.path.join(info['WORKDIR'], basename)
            log.debug("Moved tr file %s into WORKDIR" % basename)
            shutil.move(trfile[0], tgt)
            info["TRAFO_FILES"].append(tgt)

        return info
Пример #2
0
 def validate_run(self, log, info, run_code, out):
     if "No decoys with label DECOY_ were found" in out:
         raise RuntimeError("No DECOY_s found in fasta. Please use other fasta!")
     validation.check_stdout(log, out)
     validation.check_exitcode(log, run_code)
     validation.check_xml(log, info[Keys.PEPXML])
     return info
Пример #3
0
    def validate_run(self, log, info, exit_code, stdout):
        validation.check_exitcode(log, exit_code)

        for i in info['APMS_OUT']:
            validation.check_file(log, i)

        return info
Пример #4
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_stdout(log,stdout)
     validation.check_exitcode(log, exit_code)
     validation.check_file(log, info['SPLIB'])
     validation.check_file(log, info['TSV'])
     validation.check_xml(log, info['TRAML'])
     return info
Пример #5
0
    def validate_run(self, log, info, exit_code, stdout):
        validation.check_exitcode(log, exit_code)

        for i in info['APMS_OUT']:
            validation.check_file(log, i)

        return info
 def validate_run(self, log, info, run_code, out):
     for line in out.splitlines():
         if "OpenMS peak type estimation indicates that this is not profile data!" in line:
             raise RuntimeError("Found centroid data but LFQ must be run on profile mode data!")
     validation.check_stdout(log, out)
     validation.check_exitcode(log, run_code)
     validation.check_xml(log, info['FEATUREXML'])
     return info
Пример #7
0
 def validate_run(self, log, info, run_code, out):
     if "No decoys with label DECOY_ were found" in out:
         raise RuntimeError(
             "No DECOY_s found in fasta. Please use other fasta!")
     validation.check_stdout(log, out)
     validation.check_exitcode(log, run_code)
     validation.check_xml(log, info[Keys.PEPXML])
     return info
Пример #8
0
    def validate_run(self, log, info, exit_code, out):
        validation.check_exitcode(log, exit_code)

        if 'Valid models = 0' in out:
            raise RuntimeError('No valid model found')

        validation.check_xml(log, info[Keys.PEPXML])
        return info
Пример #9
0
    def validate_run(self, log, info, exit_code, out):
        validation.check_exitcode(log, exit_code)

        if 'Valid models = 0' in out:
            raise RuntimeError('No valid model found')

        validation.check_xml(log, info[Keys.PEPXML])
        return info
Пример #10
0
    def validate_run(self, log, info, exit_code, stdout):
        if exit_code == -8:
            raise RuntimeError("iProphet failed most probably because too few peptides were found in the search before")
        for line in stdout.splitlines():
            if 'fin: error opening' in line:
                raise RuntimeError("Could not read the input file " + line)

        validation.check_exitcode(log, exit_code)
        validation.check_xml(log, info[Keys.PEPXML])
        return info
Пример #11
0
    def validate_run(self, log, info, exit_code, stdout):
        validation.check_exitcode(log, exit_code)

        for msg in ['Error:','did not find any InterProphet results in input data!',
                    'no data - quitting',
                    'WARNING: No database referenced']:
            if msg in stdout:
                raise RuntimeError('ProteinProphet error [%s]' % msg)

        validation.check_xml(log, info[Keys.PROTXML])
        return info
Пример #12
0
 def validate_run(self, log, info, exit_code, stdout):
     if "Warning - no spectra searched" in stdout:
         raise RuntimeError("No spectra in mzXML!")
     if "CometMemAlloc" in stdout:
         #print to stdout to reach gUSE rescue functionality. ugly, no?
         print "MemoryError"
         raise RuntimeError("The job run out of RAM!")
     check_stdout(log,stdout)
     check_exitcode(log, exit_code)
     check_xml(log, info[Keys.PEPXML])
     return info
Пример #13
0
    def validate_run(self, log, info, exit_code, stdout):
        log.debug("Cp validation")
        # self checked
        if "No such file" in stdout:
            raise RuntimeError("Inputfile not found")

        if "Permission denied" in stdout:
            raise RuntimeError("Was not allowed to read inputfile. Need more rights")
        # validation util
        validation.check_file(log, info["COPY"])
        validation.check_exitcode(log, exit_code)
        return info
Пример #14
0
    def validate_run(self, log, info, exit_code, stdout):
        log.debug("Cp validation")
        #self checked
        if "No such file" in stdout:
            raise RuntimeError("Inputfile not found")

        if "Permission denied" in stdout:
            raise RuntimeError(
                "Was not allowed to read inputfile. Need more rights")
        #validation util
        validation.check_file(log, info['COPY'])
        validation.check_exitcode(log, exit_code)
        return info
Пример #15
0
    def validate_run(self, log, info, exit_code, stdout):
        validation.check_exitcode(log, exit_code)

        for msg in [
                'Error:',
                'did not find any InterProphet results in input data!',
                'no data - quitting', 'WARNING: No database referenced'
        ]:
            if msg in stdout:
                raise RuntimeError('ProteinProphet error [%s]' % msg)

        validation.check_xml(log, info[Keys.PROTXML])
        return info
Пример #16
0
    def validate_run(self, log, info, exit_code, stdout):
        if "ERROR [root] Parsing </precursorMz> failed. scan number" in stdout:
            raise ValueError("the chosen mzXML %s file contains "
                             "MS2 spectra without precursor information" %
                             info.get("MZXML"))

        validation.check_stdout(log, stdout)
        validation.check_exitcode(log, exit_code)
        info["MD5_SUMS"] = []
        for p in info["Q_FILES"]:
            validation.check_file(log, p)
            md5sum = open(p + ".md5", "r").read().split(" ")[0].strip()
            info["MD5_SUMS"].append(md5sum)
        return info
Пример #17
0
    def validate_run(self, log, info, exit_code, stdout):
        check_exitcode(log, exit_code)
        check_xml(log, info[Keys.PEPXML])

        #https://groups.google.com/forum/#!topic/spctools-discuss/dV8LSaE60ao
        shutil.move(info[Keys.PEPXML], info[Keys.PEPXML]+'.broken')
        fout = open(info[Keys.PEPXML],'w')
        for line in open(info[Keys.PEPXML]+'.broken').readlines():
            if 'spectrumNativeID' in line:
                line = re.sub('spectrumNativeID="[^"]*"', '', line)
            fout.write(line)
        fout.close()

        return info
Пример #18
0
    def validate_run(self, log, info, exit_code, stdout):
        if "ERROR [root] Parsing </precursorMz> failed. scan number" in stdout:
            raise ValueError("the chosen mzXML %s file contains "
                    "MS2 spectra without precursor information" % info.get("MZXML")
                    )

        validation.check_stdout(log, stdout)
        validation.check_exitcode(log, exit_code)
        info["MD5_SUMS"] = []
        for p in info["Q_FILES"]:
            validation.check_file(log, p)
            md5sum = open(p + ".md5", "r").read().split(" ")[0].strip()
            info["MD5_SUMS"].append(md5sum)
        return info
Пример #19
0
 def validate_run(self, log, info, exit_code, stdout):
     if "Warning - no spectra searched" in stdout:
         raise RuntimeError("No spectra in mzXML!")
     if "CometMemAlloc" in stdout:
         #print to stdout to reach gUSE rescue functionality. ugly, no?
         print "MemoryError"
         raise RuntimeError("The job run out of RAM!")
     check_stdout(log,stdout)
     if exit_code:
         log.warn("exit_code is %s", exit_code)
         mzxml = info[Keys.MZXML]
         log.warn("maybe the input file %s does not exist any more. check this !" % mzxml)
     check_exitcode(log, exit_code)
     check_xml(log, info[Keys.PEPXML])
     return info
Пример #20
0
    def validate_run(self, log, info, exit_code, stdout):
        if info['RUNRT'] == 'True':
            # Spectrast imports sample *whitout error* when no iRTs are found. Thus look for "Comment:" entries without
            # iRT= attribute in splib
            notenough = set()
            for line in open(info['SPLIB']).readlines():
                if "Comment:" in line and not "iRT=" in line:
                    samplename = re.search("RawSpectrum=([^\.]*)\.", line).group(1)
                    notenough.add(samplename)
            if notenough:
                log.error("No/not enough iRT peptides found in sample(s): " + ", ".join(notenough))

            #when irt.txt not readable: PEPXML IMPORT: Cannot read landmark table. No RT normalization will be performed.
            rtcalibfailed = False
            for line in open(info['SPLOG']).readlines():
                if "Cannot read landmark table" in line:
                    log.error("Problem with reading rtkit file %s!"%info['RTKIT'])
                    rtcalibfailed = True

            # Parse logfile to see whether R^2 is high enough. Example log for failed calibration (line 3 only when <0.9):
            # PEPXML IMPORT: RT normalization by linear regression. Found 10 landmarks in MS run "CHLUD_L110830_21".
            # PEPXML_IMPORT: Final fitted equation: iRT = (rRT - 1758) / (8.627); R^2 = 0.5698; 5 outliers removed.
            # ERROR PEPXML_IMPORT: R^2 still too low at required coverage. No RT normalization performed. Consider...
            rsqlow = False
            for line in open(info['SPLOG']).readlines():
                if "Final fitted equation:" in line:
                    samplename = prevline.strip().split(" ")[-1]
                    rsq = line.split()[-4].replace(";", "")
                    if float(rsq) < float(info['RSQ_THRESHOLD']):
                        log.error(
                            "R^2 of %s is below threshold of %s for %s" % (rsq, info['RSQ_THRESHOLD'], samplename))
                        rsqlow = True
                    else:
                        log.debug("R^2 of %s is OK for %s" % (rsq, samplename))
                else:
                    prevline = line

            # Raise only here to have all errors shown
            if rsqlow or rtcalibfailed or notenough:
                raise RuntimeError("Error in iRT calibration.")

        # Double check "Spectrast finished ..."
        if not " without error." in stdout:
            raise RuntimeError("SpectraST finished with some error!")

        validation.check_exitcode(log, exit_code)
        validation.check_file(log, info['SPLIB'])
        return info
Пример #21
0
 def validate_run(self, log, info, exit_code, stdout):
     if "Warning - no spectra searched" in stdout:
         raise RuntimeError("No spectra in mzXML!")
     if "CometMemAlloc" in stdout:
         #print to stdout to reach gUSE rescue functionality. ugly, no?
         print "MemoryError"
         raise RuntimeError("The job run out of RAM!")
     check_stdout(log, stdout)
     if exit_code:
         log.warn("exit_code is %s", exit_code)
         mzxml = info[Keys.MZXML]
         log.warn(
             "maybe the input file %s does not exist any more. check this !"
             % mzxml)
     check_exitcode(log, exit_code)
     check_xml(log, info[Keys.PEPXML])
     return info
Пример #22
0
    def validate_run(self, log, info, exit_code, stdout):
        validation.check_stdout(log, stdout)
        validation.check_exitcode(log, exit_code)

        base = os.path.join(info[Keys.WORKDIR], os.path.splitext(os.path.basename(info['FEATURETSV']))[0])
        info['MPROPHET_TSV'] = base + "_with_dscore_filtered.csv"
        validation.check_file(log, info['MPROPHET_TSV'])

        prophet_stats = []
        for end in ["_full_stat.csv", "_scorer.bin", "_weights.txt", "_report.pdf", "_dscores_top_target_peaks.txt",
                    "_dscores_top_decoy_peaks.txt"]:
            f = base + end
            if os.path.exists(f):
                prophet_stats.append(f)

        if prophet_stats:
            info['MPROPHET_STATS'] = prophet_stats
        return info
Пример #23
0
    def validate_run(self, log, info, exit_code, stdout):
        for line in stdout.splitlines():
            # Determined there to be 35792 SWATH windows and in total 6306 MS1 spectra
            if 'Determined there to be' in line:
                no_swathes = float(line.split()[4])
                if no_swathes > 128:
                    raise RuntimeError('This is a DDA sample, not SWATH!')
            if 'is below limit of ' in line:
                raise RuntimeError("iRT calibration failed: " + line)

        # validation.check_stdout(log,stdout)
        validation.check_exitcode(log, exit_code)
        validation.check_file(log, info['FEATURETSV'])
        if os.path.getsize(info['FEATURETSV']) < 1000:
            raise RuntimeError("No peak found, output is empty!")
        if 'CHROM_MZML' in info:
            #don't use check_xml() because of .gz
            validation.check_file(log, info['CHROM_MZML'])

        return info
Пример #24
0
    def validate_run(self, log, info, exit_code, stdout):
        for line in stdout.splitlines():
            # Determined there to be 35792 SWATH windows and in total 6306 MS1 spectra
            if 'Determined there to be' in line:
                no_swathes = float(line.split()[4])
                if no_swathes > 128:
                    raise RuntimeError('This is a DDA sample, not SWATH!')
            if 'is below limit of ' in line:
                raise RuntimeError("iRT calibration failed: " + line)

        # validation.check_stdout(log,stdout)
        validation.check_exitcode(log, exit_code)
        validation.check_file(log, info['FEATURETSV'])
        if os.path.getsize(info['FEATURETSV']) < 1000:
            raise RuntimeError("No peak found, output is empty!")
        if 'CHROM_MZML' in info:
            #don't use check_xml() because of .gz
            validation.check_file(log, info['CHROM_MZML'])

        return info
Пример #25
0
    def validate_run(self, log, info, exit_code, out):
        if "TypeError: expected str or unicode but got <type 'NoneType'>" in out:
            raise RuntimeError("Dataset is archived. Please unarchive first!")

        if "traceback" in out.lower():
            raise RuntimeError("traceback when talking to openbis: %s" % out)

        validation.check_exitcode(log, exit_code)
        missing = []
        for line in open(self.rfile):
            fields = line.strip().rsplit(None, 1)
            if len(fields) == 2:
                path = fields[1]
                if not os.path.exists(path):
                    missing.append(path)

        executable = info[Keys.EXECUTABLE]
        if missing:
            for p in missing:
                log.error("%s failed for %s" % (executable, p))
            raise Exception("files which should be extracted from openbis are missing")


        #KEY where to store downloaded file paths
        key = self.default_keys[executable]
        #VALUE is a list of files or the mzXMLlink
        dsfls = []
        with open(self.rfile) as f:
            for downloaded in [line.strip() for line in f.readlines()]:
                ds, fl = downloaded.split("\t")
                if ds == info[Keys.DATASET_CODE] or ds == info['EXPERIMENT']:
                    dsfls.append(fl)

        #MZXML is expected only 1
        if key == 'MZXML':
            dsfls = dsfls[0]

        log.debug("Adding %s to %s" % (dsfls, key))
        info[key] = dsfls
        return info
Пример #26
0
    def validate_run(self, log, info, exit_code, out):
        if "TypeError: expected str or unicode but got <type 'NoneType'>" in out:
            raise RuntimeError("Dataset is archived. Please unarchive first!")

        if "traceback" in out.lower():
            raise RuntimeError("traceback when talking to openbis: %s" % out)

        validation.check_exitcode(log, exit_code)
        missing = []
        for line in open(self.rfile):
            fields = line.strip().rsplit(None, 1)
            if len(fields) == 2:
                path = fields[1]
                if not os.path.exists(path):
                    missing.append(path)

        executable = info[Keys.EXECUTABLE]
        if missing:
            for p in missing:
                log.error("%s failed for %s" % (executable, p))
            raise Exception(
                "files which should be extracted from openbis are missing")

        #KEY where to store downloaded file paths
        key = self.default_keys[executable]
        #VALUE is a list of files or the mzXMLlink
        dsfls = []
        with open(self.rfile) as f:
            for downloaded in [line.strip() for line in f.readlines()]:
                ds, fl = downloaded.split("\t")
                if ds == info[Keys.DATASET_CODE] or ds == info['EXPERIMENT']:
                    dsfls.append(fl)

        #MZXML is expected only 1
        if key == 'MZXML':
            dsfls = dsfls[0]

        log.debug("Adding %s to %s" % (dsfls, key))
        info[key] = dsfls
        return info
Пример #27
0
    def validate_run(self, log, info, exit_code, stdout):
        validation.check_stdout(log, stdout)
        validation.check_exitcode(log, exit_code)

        base = os.path.join(
            info[Keys.WORKDIR],
            os.path.splitext(os.path.basename(info['FEATURETSV']))[0])
        info['MPROPHET_TSV'] = base + "_with_dscore_filtered.csv"
        validation.check_file(log, info['MPROPHET_TSV'])

        prophet_stats = []
        for end in [
                "_full_stat.csv", "_scorer.bin", "_weights.txt", "_report.pdf",
                "_dscores_top_target_peaks.txt", "_dscores_top_decoy_peaks.txt"
        ]:
            f = base + end
            if os.path.exists(f):
                prophet_stats.append(f)

        if prophet_stats:
            info['MPROPHET_STATS'] = prophet_stats
        return info
Пример #28
0
    def validate_run(self, log, info, run_code, out):
        validation.check_exitcode(log,run_code)

        if info["DB_SOURCE"] == "BioDB":
            log.info("Database remains " + info["DBASE"])
        else:
            f = open(self.rfile)
            found = False
            for line in f.readlines():
                #if info['DB_TYPE'].lower in line.lower():
                if '.fasta' in line.lower() or '.txt' in line.lower():
                    info['DBASE'] = line.split()[1]
                    log.info("FASTA database file found " + info["DBASE"])
                    found = True
                if '.traml' in line.lower():
                    info['TRAML'] = line.split()[1]
                    log.info("TraML database file found " + info["TRAML"])
                    found = True
            f.close()
            if not found:
                raise RuntimeError("No FASTA ('*.fasta') or TraML ('*.traml') database file found in the dataset files: %s" % self.rfile)

        return info
Пример #29
0
    def validate_run(self, log, info, exit_code, out):
        if "TypeError: expected str or unicode but got <type 'NoneType'>" in out:
            raise RuntimeError("Dataset is archived. Please unarchive first!")

        validation.check_exitcode(log, exit_code)

        # KEY where to store downloaded file paths
        default_keys = {"getmsdata": "MZXML", "getexperiment": "SEARCH", "getdataset": "DSSOUT"}
        key = default_keys[info[Keys.EXECUTABLE]]
        # VALUE is a list of files or the mzXMLlink
        dsfls = []
        with open(self.rfile) as f:
            for downloaded in [line.strip() for line in f.readlines()]:
                ds, fl = downloaded.split("\t")
                if ds == info[Keys.DATASET_CODE] or ds == info["EXPERIMENT"]:
                    dsfls.append(fl)

        # MZXML is expected only 1
        if key == "MZXML":
            dsfls = dsfls[0]

        log.debug("Adding %s to %s" % (dsfls, key))
        info[key] = dsfls
        return info
Пример #30
0
    def validate_run(self, log, info, run_code, out):
        validation.check_exitcode(log, run_code)

        if info["DB_SOURCE"] == "BioDB":
            log.info("Database remains " + info["DBASE"])
        else:
            f = open(self.rfile)
            found = False
            for line in f.readlines():
                # if info['DB_TYPE'].lower in line.lower():
                if ".fasta" in line.lower() or ".txt" in line.lower():
                    info["DBASE"] = line.split()[1]
                    log.info("Database found " + info["DBASE"])
                    found = True
                if ".traml" in line.lower():
                    info["TRAML"] = line.split()[1]
                    log.info("TraML found " + info["TRAML"])
                    found = True
            f.close()
            if not found:
                log.error("No matching database (.fasta or .traml) found in dataset!")
                return info

        return info
Пример #31
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_stdout(log,stdout)
     validation.check_exitcode(log, exit_code)
     validation.check_file(log, info['ALIGNMENT_MATRIX'])
     return info
Пример #32
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_exitcode(log, exit_code)
     validation.check_stdout(log, stdout)
     validation.check_xml(log, info[Keys.PEPXML])
     return info
Пример #33
0
 def validate_run(self, log, info, run_code, stdout):
     validation.check_exitcode(log, run_code)
     return info
Пример #34
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_exitcode(log, exit_code)
     validation.check_xml(log, info['PROTXML'])
     return info
 def validate_run(self, log, info, run_code, out):
     validation.check_exitcode(log, run_code)
     validation.check_file(log, info['PROTCSV'])
     validation.check_file(log, info['PEPCSV'])
     validation.check_xml(log, info['CONSENSUSXML'])
     return info
Пример #36
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_stdout(log, stdout)
     validation.check_exitcode(log, exit_code)
     if info.get('DO_CHROMML_REQUANT', "") != "false":
         validation.check_file(log, info['REQUANT_TSV'])
     return info
Пример #37
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_stdout(log, stdout)
     validation.check_exitcode(log, exit_code)
     if info.get('DO_CHROMML_REQUANT', "") != "false":
         validation.check_file(log, info['REQUANT_TSV'])
     return info
Пример #38
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_exitcode(log, exit_code)
     validation.check_file(log, info['MAYUOUT'])
     return info
Пример #39
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_exitcode(log, exit_code)
     validation.check_stdout(log, stdout)
     return info
Пример #40
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_exitcode(log, exit_code)
     validation.check_stdout(log, stdout)
     return info
Пример #41
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_exitcode(log, exit_code)
     validation.check_file(log, info['MAYUOUT'])
     return info
Пример #42
0
 def validate_run(self, log, info, run_code, out):
     validation.check_exitcode(log, run_code)
     validation.check_file(log, info['PROTCSV'])
     validation.check_file(log, info['PEPCSV'])
     validation.check_xml(log, info['CONSENSUSXML'])
     return info
Пример #43
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_exitcode(log, exit_code)
     validation.check_xml(log, info['PROTXML'])
     return info
Пример #44
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_exitcode(log, exit_code)
     validation.check_stdout(log,stdout)
     validation.check_xml(log, info[Keys.PEPXML])
     return info
Пример #45
0
 def validate_run(self, log, info, run_code, stdout):
     validation.check_exitcode(log, run_code)
     return info
Пример #46
0
 def validate_run(self, log, info, exit_code, stdout):
     for f in info['DUMP_MZXML']:
         validation.check_file(log, f)
     validation.check_exitcode(log, exit_code)
     return info
Пример #47
0
 def validate_run(self, log, info, exit_code, stdout):
     for f in info['DUMP_MZXML']:
         validation.check_file(log, f)
     validation.check_exitcode(log, exit_code)
     return info
Пример #48
0
 def validate_run(self, log, info, exit_code, stdout):
     validation.check_stdout(log, stdout)
     validation.check_exitcode(log, exit_code)
     validation.check_file(log, info['ALIGNMENT_MATRIX'])
     return info