def _formatEdges(self, edges, weights): if len(edges) == 0: return '.' else: return ';'.join(formatPhraseWithCorrectChrUsage(edge, useUrlEncoding=True, notAllowedChars='#,;=\t') + \ ('=' + self._formatEdgeWeight(weights[i]) if weights is not None else '') \ for i,edge in enumerate(edges) )
def _composeDataLine(self, ge, hbColumns, dataLineCount, lastGE): cols = [] for hbColName in hbColumns: if hbColName == 'start': cols.append(self._formatStart(ge.start)) elif hbColName == 'end': cols.append(self._formatEnd(ge.end)) elif hbColName == 'strand': cols.append(getStringFromStrand(ge.strand)) elif hbColName == 'val': cols.append(self._formatValue(ge.val)) elif hbColName == 'edges': cols.append(self._formatEdges(ge.edges, ge.weights)) elif hbColName == 'weights': pass else: cols.append(formatPhraseWithCorrectChrUsage(unicode(getattr(ge, hbColName)), \ useUrlEncoding=True, notAllowedChars='#\t')) if self._headerDict['fixed-size data lines']: assert len(cols) == 1 return cols[0] + (os.linesep if (dataLineCount * len(cols[0])) % 60 < len(cols[0]) \ or lastGE else '') else: return '\t'.join(cols) + os.linesep
def _composeTrackLines(gSuite, colSpecs, attributes, out): for track in gSuite.allTracks(): if track.comment: print >> out, '#' + track.comment cells = [ formatPhraseWithCorrectChrUsage(getattr(track, colSpec.memberName)) for colSpec in colSpecs ] for attribute in attributes: if attribute in track.attributes: cells.append( formatPhraseWithCorrectChrUsage( track.attributes[attribute])) else: cells.append('.') print >> out, '\t'.join(cells)
def _composeBoundingRegionLine(self, boundingRegionTuple): region = boundingRegionTuple.region.getCopy() if self._headerDict['1-indexed']: region.start = region.start + 1 if region.start is not None else None region.end = region.end + 1 if region.end is not None else None if self._headerDict['end inclusive']: region.end = region.end - 1 if region.end is not None else None brLinePartList = [(Gtrack.convertNameToGtrack(attr), getattr(region, attr)) for attr in ['genome', 'chr', 'start', 'end']] return '####' + '; '.join(k + '=' + formatPhraseWithCorrectChrUsage(str(v), useUrlEncoding=True, notAllowedChars='=;#\t') \ for k,v in brLinePartList if v is not None) + os.linesep
def _commonFormatVal(self, val, valueType, valueDim): valTypeInfo = Gtrack.VAL_TYPE_DICT[valueType] if valueDim == 'scalar': if not isinstance(val, basestring) and hasattr( val, '__len__') and len(val) == 1: val = val[0] if (val == valTypeInfo.missingVal) or (isNan(val) and isNan( valTypeInfo.missingVal)): return '.' elif isinstance(val, basestring): return formatPhraseWithCorrectChrUsage( val, useUrlEncoding=True, notAllowedChars='#.,;=\t') else: if isinstance(val, bool): return '1' if val == True else '0' else: return str(val) else: return valTypeInfo.delim.join([self._commonFormatVal(valPart, valueType, 'scalar') for valPart in val]) \ if len(val) != 0 else '.'
def _composeHeaders(gSuite, out): for headerKey, headerVal in _getAllHeadersToPrint(gSuite): print >> out, '##%s: %s' % (headerKey, formatPhraseWithCorrectChrUsage(headerVal))
def _composeColSpecLine(self, columns): return '###' + '\t'.join(formatPhraseWithCorrectChrUsage(str(col), useUrlEncoding=False, \ notAllowedChars='#\t') for col in columns) + os.linesep
def execute(cls, choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' try: if choices.history: inputFile = open(ExternalTrackManager.extractFnFromGalaxyTN(choices.history.split(':')), 'r') else: inputFile = StringIO(choices.input) headers = cls._getHeaders(choices) headerIdxs = {} for i, header in enumerate(headers): headerIdxs[header] = i createDense = choices.createDense == 'Yes' if createDense: firstRegInBlock = None curReg = None assert headerIdxs['seqid'] is not None assert headerIdxs['start'] is not None headers[headerIdxs['start']] = '' create3dData = cls._create3dData(choices) if create3dData: if any(x in headers for x in ['id', 'edges']): print >> sys.stderr, "Error: when using the special 3D input columns 'linked_seqid' and " + \ "'linked_start', the columns 'id' and 'edges' must not " + \ "be specified in addition." return for header in ['linked_seqid', 'linked_start', 'linked_end', 'link_weight']: if header in headerIdxs: headers[headerIdxs[header]] = '' for header in ['id', 'edges']: headerIdxs[header] = len(headers) headers += [header] regs = [] regIdx = 0 prevRegIdx = 0 idDict = {} idCount = 0 curCols = None prevLine = '' firstRegInBlock = None curReg = None prev3dReg = None nextReg = None if createDense or create3dData: newInputFile = NamedTemporaryFile() sortedInputFile = NamedTemporaryFile() colIndexes = [i for i, header in enumerate(headers) if header != ''] numSkipLines = cls._getNumSkipLines(choices) tempContents = NamedTemporaryFile() tempDataLines = NamedTemporaryFile() if choices.indexing == '1-indexed, end inclusive': tempContents.write('##1-indexed: true' + os.linesep) tempContents.write('##end inclusive: true' + os.linesep) tempContents.write('###' + '\t'.join([headers[i] for i in colIndexes]) + os.linesep) for passType in ['pre','final'] if createDense or create3dData else ['final']: for i in xrange(numSkipLines): inputFile.readline() splitChar = cls._getSplitChar(choices) numCols = cls._getFileContentsInfo(choices).numCols regionsDecreased = False autoCorrectSeqId = choices.handleSeqId == 'Yes, auto-correct to the best match in the genome build' cropCrossingSegments = choices.cropCrossingSegments == 'Yes' genome = choices.genome for i, line in enumerate(inputFile): if line == '' or len(line) > 0 and line[0] == '#': pass cols = [x.strip() for x in line.strip().split(splitChar)] if create3dData: cols += ['', ''] for j in colIndexes: if len(cols) <= j: print >> sys.stderr, "Error in line #%s: %s" % (i+1, line) print >> sys.stderr, "The line does not include the column #%s, which is defined with " \ "the name '%s' (the number of columns is %s). Please fix the input " \ "file or redefine the column names of this column." \ % (j+1, headers[j], len(cols)) return if autoCorrectSeqId: from quick.util.GenomeInfo import GenomeInfo cols[headerIdxs['seqid']] = GenomeInfo.findBestMatchingChr(genome, cols[headerIdxs['seqid']]) for j, col in enumerate(cols): if col == '': cols[j] = '.' else: cols[j] = formatPhraseWithCorrectChrUsage(col, notAllowedChars='#\t') if cropCrossingSegments: from quick.util.GenomeInfo import GenomeInfo for seqidHdr, startHdr, endHdr in [('seqid','start','end')] \ + ([('linked_seqid','linked_start','linked_end')] if create3dData else []): if endHdr in headerIdxs: seqid = cols[headerIdxs[seqidHdr]] start = cols[headerIdxs[startHdr]] end = cols[headerIdxs[endHdr]] if not any(x == '.' for x in [seqid, start, end]): start, end = int(start), int(end) if choices.indexing == '1-indexed, end inclusive': start -= 1 chrLen = GenomeInfo().getChrLen(genome, seqid) if start < chrLen and end > chrLen: cols[headerIdxs[endHdr]] = str(chrLen) if createDense or create3dData: prevReg = curReg curReg = cls._getGenomeRegion(cols[headerIdxs['seqid']], cols[headerIdxs['start']], \ cols[headerIdxs['end']] if headerIdxs.get('end') else None) if passType == 'pre': newInputFile.write(line.strip() + os.linesep) if create3dData: id = curReg.strShort() if id not in idDict: regs.append(curReg) idDict[id] = '' linkedReg = cls._getGenomeRegion(cols[headerIdxs['linked_seqid']], cols[headerIdxs['linked_start']], \ cols[headerIdxs['linked_end']] if 'end' in headerIdxs else None) if choices.undirected == 'Yes' and linkedReg and linkedReg != curReg: id = linkedReg.strShort() if id not in idDict: regs.append(linkedReg) idDict[id] = '' cols[headerIdxs['seqid']], cols[headerIdxs['linked_seqid']] = cols[headerIdxs['linked_seqid']], cols[headerIdxs['seqid']] cols[headerIdxs['start']], cols[headerIdxs['linked_start']] = cols[headerIdxs['linked_start']], cols[headerIdxs['start']] if 'end' in headerIdxs: cols[headerIdxs['end']], cols[headerIdxs['linked_end']] = cols[headerIdxs['linked_end']], cols[headerIdxs['end']] newInputFile.write(splitChar.join(cols[:-2]) + os.linesep) else: #passType == 'final': if firstRegInBlock is None: firstRegInBlock = curReg if create3dData: if curReg != prevReg: prevCols = curCols prevRegIdx = regIdx regIdx = 0 id = curReg.strShort() curCols = copy(cols) curCols[headerIdxs['id']] = idDict[id] if choices.idGeneration == 'Counting' else id curCols[headerIdxs['edges']] = '' linkedReg = cls._getGenomeRegion(cols[headerIdxs['linked_seqid']], cols[headerIdxs['linked_start']], \ cols[headerIdxs['linked_end']] if 'end' in headerIdxs else None) if linkedReg: edges = curCols[headerIdxs['edges']] if edges != '': edges += ';' id = linkedReg.strShort() if id not in idDict: raise InvalidFormatError("Error: linked region '%s' is not present in tabular file. Line: %s" % (linkedReg, line)) if choices.complete == 'Yes': while regIdx < len(regs) and regs[regIdx] != linkedReg: missingId = regs[regIdx].strShort() edges += '%s=.;' % (idDict[missingId] if choices.idGeneration == 'Counting' else missingId) regIdx += 1 edges += idDict[id] if choices.idGeneration == 'Counting' else id if 'link_weight' in headerIdxs: edges += '=' + formatPhraseWithCorrectChrUsage( cols[headerIdxs['link_weight']], notAllowedChars='#\t') regIdx += 1 curCols[headerIdxs['edges']] = edges if curReg != prevReg and prevCols: if choices.complete == 'Yes': for i in xrange(prevRegIdx, len(regs)): missingId = regs[i].strShort() if i != 0: prevCols[headerIdxs['edges']] += ';' prevCols[headerIdxs['edges']] += '%s=.' % (idDict[missingId] if choices.idGeneration == 'Counting' else missingId) if prevCols[headerIdxs['edges']] == '': prevCols[headerIdxs['edges']] = '.' cls._checkOverlap(prev3dReg, prevReg, prevLine) if createDense: firstRegInBlock, tempDataLines = cls._writeBlockLines \ (firstRegInBlock, prev3dReg, prevReg, tempContents, tempDataLines) cls._writeDataLines(prevCols, colIndexes, tempDataLines) prev3dReg = prevReg prevLine = line else: #createDense cls._checkOverlap(prevReg, curReg, line) firstRegInBlock, tempDataLines = cls._writeBlockLines \ (firstRegInBlock, prevReg, curReg, tempContents, tempDataLines) cls._writeDataLines(cols, colIndexes, tempDataLines) else: cls._writeDataLines(cols, colIndexes, tempDataLines) if passType == 'pre': newInputFile.flush() inputFile.close() sortCmd = ["sort", newInputFile.name, "-t$'%s'" % splitChar, "-s"] +\ ["-k%s,%s%s" % (headerIdxs[x]+1, headerIdxs[x]+1, s) if x in headerIdxs else "" \ for x,s in [('seqid',''), ('start','n'), ('end','n'), \ ('linked_seqid',''), ('linked_start','n'), ('linked_end','n')]] +\ ["-o", sortedInputFile.name] subprocess.call(' '.join(sortCmd), stderr=sys.stderr, stdout = sys.stdout, shell=True) #print >> sys.stderr, ' '.join(sortCmd) #os._exit(0) newInputFile.close() if create3dData: regs = sorted(regs) for i,reg in enumerate(regs): idDict[reg.strShort()] = str(i) inputFile = sortedInputFile inputFile.seek(0) numSkipLines = 0 curReg = None else: #passType == 'final': if create3dData: if choices.complete == 'Yes': for i in xrange(regIdx, len(regs)): missingId = regs[i].strShort() if i != 0: curCols[headerIdxs['edges']] += ';' curCols[headerIdxs['edges']] += '%s=.' % (idDict[missingId] if choices.idGeneration == 'Counting' else missingId) if curCols[headerIdxs['edges']] == '': curCols[headerIdxs['edges']] = '.' cls._checkOverlap(prev3dReg, curReg, prevLine) if createDense: firstRegInBlock, tempDataLines = cls._writeBlockLines \ (firstRegInBlock, prev3dReg, curReg, tempContents, tempDataLines) cls._writeDataLines(curCols, colIndexes, tempDataLines) if createDense: firstRegInBlock, tempDataLines = cls._writeBlockLines \ (firstRegInBlock, curReg, None, tempContents, tempDataLines) tempDataLines.flush() tempDataLines.seek(0) tempContents.write(tempDataLines.read()) tempContents.flush() tempContents.seek(0) #print tempContents.read() #tempContents.seek(0) expandHeadersOfGtrackFileAndReturnComposer(tempContents.name).composeToFile(galaxyFn) geSource = GtrackGenomeElementSource(galaxyFn, genome=genome, printWarnings=False) for ge in geSource: pass except Exception, e: print >> sys.stderr, e raise