def evalConfigFunc(configPyFile, getFuncName="getConfig", getFuncArgs=[], getFuncKwargs={}, extraEnv=None): """Evaluate the specified configuration file and call the specified function (defaulting to getConfig()) define in the file. The value of the call of this function is returned. This is useful for config files that need to construct complex objects. Arguments and keyword arguments can be passed to this functions getFuncArgs, and getFuncKwargs. The first argument is configPyFile, containing the configuration file name, which is also is set in the module globals before evaluation. If specified, the dict extraEnv contents will be passed as a module globals. """ configEnv = _evalConfigFile(configPyFile, configEnv=dict(), extraEnv=extraEnv) configFunc = configEnv.get(getFuncName) if configFunc is None: raise PycbioException( "configuration script does not define function {}(): {} ".format( getFuncName, configPyFile)) if not isinstance(configFunc, FunctionType): raise PycbioException( "configuration script defines {}, however it is not a function: {}" .format(getFuncName, configPyFile)) getFuncArgs = [configPyFile] + list(getFuncArgs) try: return configFunc(*getFuncArgs, **getFuncKwargs) except Exception as ex: # FIXME really need traceback here raise PycbioException( "Error from configuration function {}(): {}".format( getFuncName, configPyFile)) from ex
def __init__(self, procDesc, returncode=None, stderr=None, cause=None): self.returncode = returncode self.stderr = stderr if returncode == None: msg = "exec failed" elif (returncode < 0): msg = "process signaled: " + _getSigName(-returncode) else: msg = "process exited " + str(returncode) if procDesc != None: msg += ": " + procDesc if (stderr != None) and (len(stderr) != 0): msg += ":\n" + stderr PycbioException.__init__(self, msg, cause=cause)
def __init__(self, procDesc, returncode=None, stderr=None, cause=None): self.returncode = returncode self.stderr = stderr if returncode is None: msg = "exec failed" elif (returncode < 0): msg = "process signaled: " + _getSigName(-returncode) else: msg = "process exited " + str(returncode) if procDesc is not None: msg += ": " + procDesc if (stderr is not None) and (len(stderr) != 0): msg += ":\n" + stderr PycbioException.__init__(self, msg, cause=cause)
def _calcParams(self): "Calculate binning paramters" self.data.compute() self.binMinUse = self.binMin if self.binMinUse is None: self.binMinUse = self.data.min self.binMaxUse = self.binMax if self.binMaxUse is None: self.binMaxUse = self.data.max self.numBinsUse = self.numBins self.binSizeUse = self.binSize if (self.numBinsUse is None) and (self.binSizeUse is None): # default num bins and compute bin size from it below self.numBinsUse = 10 if self.binMinUse is None: self.binSizeUse = self.binFloorUse = self.binCeilUse = 0 elif self.binSizeUse is None: # compute bin size from num bins estBinSize = (self.binMaxUse - self.binMinUse) / (self.numBinsUse - 1) self.binSizeUse = (self.binMaxUse - self.binMinUse + estBinSize) / self.numBinsUse self.binFloorUse = self.binMinUse - (self.binSizeUse / 2.0) self.binCeilUse = self.binFloorUse + (self.numBinsUse * self.binSizeUse) else: # compute num bins from bin size raise PycbioException("doesn't work") self.numBinsUse = (self.binMaxUse - self.binMinUse) // self.binSizeUse self.binFloorUse = self.binMinUse self.binCeilUse = self.binMaxUse
def __skipToSeqTable(self, fh): "skip past header line before sequence records" for line in fh: if line[0:-1] == self.expectedHeader: return raise PycbioException("expected assembly report header not found in " + fh.name)
def _parseLine(self, line): line = line.strip() if (len(line) > 0) and not line.startswith("#"): i = line.find("=") if i < 0: raise PycbioException("expected name=value, got: " + line) self[line[0:i].strip()] = line[i + 1:].strip()
def getByName(self, name): "get or error if not found" seq = self.get(name) if seq is None: raise PycbioException("can't find sequence {} in table {}".format( name, self.table)) return seq
def getrByTranscriptId(self, transcriptId): """get the required GencodeAttrs object for transcriptId, or error if not found.""" attrs = self.getByTranscriptId(transcriptId) if attrs is None: PycbioException("transcriptId {} not found in {}".format( transcriptId, self.table)) return attrs
def getrByGeneId(self, geneId): """get required GencodeAttrs objects for geneId or error if not found""" attrses = self.getByGeneId(geneId) if len(attrses) == 0: PycbioException("geneId {} not found in {}".format( geneId, self.table)) return attrses
def __init__(self, lines): "parse file given output lines of para check" # simple parse self.unsubmitted = 0 self.subErrors = 0 self.queueErrors = 0 self.trackingErrors = 0 self.waiting = 0 self.crashed = 0 self.running = 0 self.ranOk = 0 self.totalJobs = 0 # special cases self.paraResultsErrors = 0 self.slow = 0 self.hung = 0 self.failed = 0 # parse lines, skiping empty lines for line in lines: line = line.strip() if len(line) > 0: words = line.split(":") if (len(words) < 2) or not self._parseLine(words): raise PycbioException("don't know how to parse para check output line: {}".format(line))
def _parseCds(self, line): m = self._parseRe.match(line) if m is None: raise PycbioException("can't parse CDS line: " + line) st = int(m.group(2)) - 1 en = int(m.group(3)) - 1 self[m.group(1)] = (st, en)
def getSubset(self, wantSet): "search for the specified subset object, error if it doesn't exist" if self.subsets is None: self.subsets = self._makeSubsets(self.elements) for ss in self.subsets: if ss == wantSet: return ss raise PycbioException("not a valid subset: " + str(wantSet))
def getSubseq(self, seq): "find the corresponding subSeq array" if seq == self.qSeq: return self.qSubSeqs elif seq == self.tSeq: return self.tSubSeqs else: raise PycbioException("seq is not part of this alignment")
def parse(cls, row, numStdCols=None): """Parse bed string columns into a bed object. If self.numStdCols is specified, only those columns are parse and the remained goes to extraCols.""" assert ((numStdCols is None) or (3 <= numStdCols <= 12)) if numStdCols is None: numStdCols = min(len(row), 12) if len(row) < numStdCols: raise PycbioException( "expected at least {} columns, found {}: ".format( numStdCols, len(row))) chrom = row[0] chromStart = int(row[1]) chromEnd = int(row[2]) if numStdCols > 3: name = row[3] else: name = None if numStdCols > 4: score = int(row[4]) else: score = None if numStdCols > 5: strand = row[5] else: strand = None if numStdCols > 7: thickStart = int(row[6]) thickEnd = int(row[7]) else: thickStart = None thickEnd = None if numStdCols > 8: itemRgb = row[8] else: itemRgb = None if numStdCols > 11: blocks = Bed._parseBlockColumns(chromStart, row) else: blocks = None if len(row) > numStdCols: extraCols = row[numStdCols:] else: extraCols = None return cls(chrom, chromStart, chromEnd, name=name, score=score, strand=strand, thickStart=thickStart, thickEnd=thickEnd, itemRgb=itemRgb, blocks=blocks, extraCols=extraCols, numStdCols=numStdCols)
def reverseStrand(strand): "get reverse strand, or None if none" if strand is None: return None elif strand == '+': return '-' elif strand == '-': return '+' else: raise PycbioException("invalid strand '{}'".format(strand))
def compressCmd(path, default="cat"): """return the command to compress the path, or default if not compressed, which defaults to the `cat' command, so that it just gets written through""" if path.endswith(".Z"): raise PycbioException("writing compress .Z files not supported") elif path.endswith(".gz"): return "gzip" elif path.endswith(".bz2"): return "bzip2" else: return default
def setup(self, opts): """initializing profiling, if requested""" if opts.profile is None: if opts.signal is not None: raise PycbioException("can't specify --profile-signal without --profile") else: if opts.signal is not None: self._setupSignalHandler(opts.signal) self.logFile = opts.profile self.profiler = cProfile.Profile() self.profiler.enable()
def fromPhase(phase): """construct a Frame from a GFF/GTF like phase, which maybe an int or str""" if isinstance(phase, str): phase = int(phase) if phase == 0: return Frame(0) elif phase == 1: return Frame(2) elif phase == 2: return Frame(1) else: raise PycbioException("invalid phase: {}".format(phase))
def findTmpDir(tmpDir=None): """find the temporary directory to use, if tmpDir is not None, it is use""" if tmpDir is not None: return tmpDir tmpDir = os.getenv("TMPDIR") if tmpDir is not None: return tmpDir # UCSC special checks for tmpDir in ("/data/tmp", "/scratch/tmp", "/var/tmp", "/tmp"): if os.path.exists(tmpDir): return tmpDir raise PycbioException("can't find a tmp directory")
def add(self, seqId, start, end, value, strand=None): "add an entry for a sequence and range, and optional strand" self._checkStrand(strand) if self.haveStrand is None: self.haveStrand = (strand is not None) elif self.haveStrand != (strand is not None): raise PycbioException("all RangeFinder entries must either have strand or not have strand") key = (seqId, strand) bins = self.seqBins.get(key) if bins is None: self.seqBins[key] = bins = RangeBins(seqId, strand) bins.add(start, end, value)
def _evalConfigFile(configPyFile, configEnv, extraEnv=None): "evaluate file and return environment" configEnv[configPyFileVar] = os.path.abspath(configPyFile) configEnv[include_config.__name__] = include_config if extraEnv is not None: configEnv.update(extraEnv) try: with open(configPyFile) as fh: exec(fh.read(), configEnv, configEnv) except Exception as ex: raise PycbioException("Error evaluating configuration file: {}".format( configPyFile)) from ex return configEnv
def __init__(self, fileName, buildIdx=False, buildUniqIdx=False, buildRangeIdx=False): if buildIdx and buildUniqIdx: raise PycbioException("can't specify both buildIdx and buildUniqIdx") for row in GenePredReader(fileName): self.append(row) self.names = None self.rangeMap = None if buildUniqIdx: self._buildUniqIdx() if buildIdx: self._buildIdx() if buildRangeIdx: self._buildRangeIdx()
def opengz(fileName, mode="r", buffering=-1, encoding=None, errors=None): """open a file, if it ends in an extension indicating compression, open with a compression or decompression pipe.""" if isCompressed(fileName): if mode.startswith("r"): cmd = decompressCmd(fileName) return pipettor.Popen([cmd, fileName], mode=mode, buffering=buffering, encoding=encoding, errors=errors) elif mode.startswith("w"): cmd = compressCmd(fileName) return pipettor.Popen([cmd], mode=mode, stdout=fileName, buffering=buffering, encoding=encoding, errors=errors) else: raise PycbioException("mode {} not support with compression for {}".format(mode, fileName)) else: return open(fileName, mode, buffering=buffering, encoding=encoding, errors=errors)
def _fifoMk(suffix="tmp", tmpDir=None): "create a FIFO with a unique name in tmp directory" # FIXME: don't need suffix/tmpDir, unless this made of part the Fifo API if tmpDir is None: tmpDir = os.getenv("TMPDIR", "/var/tmp") prefix = "{}/{}.{}".format(tmpDir, socket.gethostname(), os.getpid()) maxTries = 1000 unum = 0 while unum < maxTries: path = "{}.{}.{}".format(prefix, unum, suffix) if _NamedFifo._fifoMkAtomic(path): return path unum += 1 raise PycbioException("unable to create a unique FIFO name in the form \"{}.*.{} after {} tries".format(prefix, suffix, maxTries))
def __parseRecord(self, fh, line): row = line.split('\t') if len(row) != 10: raise PycbioException( "expected 10 columns in assemble report record, found " + str(len(row)) + " in " + fh.name) rec = self.Record(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], int(row[8]), row[9]) self.seqs.append(rec) self.bySequenceName[rec.sequenceName] = rec if rec.genBankAccn != None: self.byGenBankAccn[rec.genBankAccn] = rec if rec.refSeqAccn != None: self.byRefSeqAccn[rec.refSeqAccn] = rec if rec.ucscStyleName != None: self.byUcscStyleName[rec.ucscStyleName] = rec
def __init__(self, paraHost, runDir, paraDir, jobFile=None, cpu=None, mem=None, maxJobs=None, retries=None): """"will chdir to run dir.. paraDir should be relative to runDir or absolute, jobFile should be relative to runDir or absolute. """ self.paraHost = paraHost # symlinks can confuse parasol, as it can give two different names for a job. self.runDir = os.path.realpath(os.path.abspath(runDir)) self.paraDir = os.path.realpath(paraDir) self.jobFile = jobFile self.cpu = cpu self.mem = mem self.maxJobs = maxJobs self.retries = retries fileOps.ensureDir(self._mkAbs(self.runDir, self.paraDir)) if jobFile is not None: absJobFile = self._mkAbs(self.runDir, self.jobFile) if not os.path.exists(absJobFile): raise PycbioException("job file not found: {}".format(absJobFile))
def _checkStrand(self, strand): if strand not in (None, "+", "-"): raise PycbioException("invalid strand: {}".format(strand))
def __init__(self): PycbioException.__init__(self, "task terminated")
def __init__(self, msg, cause=None): PycbioException.__init__(self, msg, cause)
def __init__(self, msg, reader=None, cause=None): if (reader != None): msg = str(reader.fileName) + ":" + str(reader.lineNum) + ": " + msg PycbioException.__init__(self, msg, cause)
def __reportExprError(self, ex): self.verb.prall(strOps.dup(80,"=")+"\n") self.verb.prall(PycbioException.formatExcept(ex) + "\n") self.verb.prall(strOps.dup(80,"-")+"\n")
def addUniq(d, k, v): "add to a dict, generating an error if the item already exists" if k in d: raise PycbioException("item \"{}\" already in dict".format(str(k))) d[k] = v
def _buildUniqIdx(self): self.names = dict() for row in self: if row.name in self.names: raise PycbioException("gene with this name already in index: " + row.name) self.names[row.name] = row