def findOverrepresentedTFsFromGeneSet(genome, tfSource, ensembleGeneIdList,upFlankSize, downFlankSize, geneSource, galaxyFn): #galaxyFn = '/usit/insilico/web/lookalike/galaxy_dist-20090924-dev/database/files/003/dataset_3347.dat' #print 'overriding galaxyFN!: ', galaxyFn galaxyId = extractIdFromGalaxyFn(galaxyFn) uniqueWebPath = getUniqueWebPath(extractIdFromGalaxyFn(galaxyFn)) assert genome == 'hg18' tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome) tfTrackName = tfTrackNameMappings[tfSource] #Get gene track assert geneSource == 'Ensembl' targetGeneRegsTempFn = uniqueWebPath + os.sep + 'geneRegs.bed' geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome) geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed') GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName, ensembleGeneIdList, targetGeneRegsTempFn ) assert upFlankSize == downFlankSize == 0 #Should instead extend regions to include flanks tcGeneRegsTempFn = uniqueWebPath + os.sep + 'tcGeneRegs.targetcontrol.bedgraph' #Think this will be okay, subtraction not necessary as targets are put first: controlGeneRegsTempFn = geneRegsFn #print targetGeneRegsTempFn, controlGeneRegsTempFn, tcGeneRegsTempFn GalaxyInterface.combineToTargetControl(targetGeneRegsTempFn, controlGeneRegsTempFn, tcGeneRegsTempFn) #tcGeneRegsExternalTN = ['external'] +galaxyId + [tcGeneRegsTempFn] tcGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(galaxyId, 'tempTc') #tcGeneRegsExternalTN = ['external'] +targetGalaxyId + [tcGeneRegsTempFn] #tcGeneRegsExternalTN = ['galaxy', externalId, tcGeneRegsTempFn] targetGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(galaxyId, 'tempTc', '1') controlGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(galaxyId, 'tempTc', '0') #pre-process print 'Pre-processing file: %s, with trackname: %s ' % (tcGeneRegsTempFn, tcGeneRegsExternalTN) ExternalTrackManager.preProcess(tcGeneRegsTempFn, tcGeneRegsExternalTN, 'targetcontrol.bedgraph',genome) print 'Pre-processing TN: ', targetGeneRegsExternalTN ExternalTrackManager.preProcess(targetGeneRegsTempFn, targetGeneRegsExternalTN, 'bed',genome) print 'Pre-processing TN: ', controlGeneRegsExternalTN ExternalTrackManager.preProcess(controlGeneRegsTempFn, controlGeneRegsExternalTN, 'bed',genome) #print tcGeneRegsExternalTN trackName1, trackName2 = tfTrackName, tcGeneRegsExternalTN analysisDef = 'Categories differentially located in targets?: Which categories of track1-points fall more inside case than control track2-segments? [rawStatistic:=PointCountInsideSegsStat:]' +\ '[tf1:=SegmentToStartPointFormatConverter:] [tf2:=TrivialFormatConverter:]' +\ '-> DivergentRowsInCategoryMatrixStat' regSpec, binSpec = '*','*' #print 'skipping preproc!!' #ExternalTrackManager.preProcess(tcGeneRegsExternalTN[-1], tcGeneRegsExternalTN, 'targetcontrol.bedgraph', genome) #ExternalTrackManager.preProcess(targetGeneRegsTempFn, targetGeneRegsExternalTN, 'bed', genome) GalaxyInterface.runManual([trackName1, trackName2], analysisDef, regSpec, binSpec, genome, printResults=True, printHtmlWarningMsgs=False)
def getExternalTrackName(self): from quick.application.ExternalTrackManager import ExternalTrackManager return ExternalTrackManager.createStdTrackName(self._id[:-1], self._id[-1] )
def findOverrepresentedTFsFromGeneSet(genome, tfSource, ensembleGeneIdList, upFlankSize, downFlankSize, geneSource, galaxyFn): #galaxyFn = '/usit/insilico/web/lookalike/galaxy_dist-20090924-dev/database/files/003/dataset_3347.dat' #print 'overriding galaxyFN!: ', galaxyFn galaxyId = extractIdFromGalaxyFn(galaxyFn) uniqueWebPath = GalaxyRunSpecificFile([], galaxyFn).getDiskPath() assert genome == 'hg18' tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome) tfTrackName = tfTrackNameMappings[tfSource] #Get gene track assert geneSource == 'Ensembl' targetGeneRegsTempFn = uniqueWebPath + os.sep + 'geneRegs.bed' geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome) geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed') GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName, ensembleGeneIdList, targetGeneRegsTempFn) assert upFlankSize == downFlankSize == 0 #Should instead extend regions to include flanks tcGeneRegsTempFn = uniqueWebPath + os.sep + 'tcGeneRegs.targetcontrol.bedgraph' #Think this will be okay, subtraction not necessary as targets are put first: controlGeneRegsTempFn = geneRegsFn #print targetGeneRegsTempFn, controlGeneRegsTempFn, tcGeneRegsTempFn GalaxyInterface.combineToTargetControl(targetGeneRegsTempFn, controlGeneRegsTempFn, tcGeneRegsTempFn) #tcGeneRegsExternalTN = ['external'] +galaxyId + [tcGeneRegsTempFn] tcGeneRegsExternalTN = ExternalTrackManager.createStdTrackName( galaxyId + ['tempTc']) #tcGeneRegsExternalTN = ['external'] +targetGalaxyId + [tcGeneRegsTempFn] #tcGeneRegsExternalTN = ['galaxy', externalId, tcGeneRegsTempFn] targetGeneRegsExternalTN = ExternalTrackManager.createStdTrackName( galaxyId + ['tempTc', '1']) controlGeneRegsExternalTN = ExternalTrackManager.createStdTrackName( galaxyId + ['tempTc', '0']) #pre-process print 'Pre-processing file: %s, with trackname: %s ' % ( tcGeneRegsTempFn, tcGeneRegsExternalTN) ExternalTrackManager.preProcess(tcGeneRegsTempFn, tcGeneRegsExternalTN, 'targetcontrol.bedgraph', genome) print 'Pre-processing TN: ', targetGeneRegsExternalTN ExternalTrackManager.preProcess(targetGeneRegsTempFn, targetGeneRegsExternalTN, 'bed', genome) print 'Pre-processing TN: ', controlGeneRegsExternalTN ExternalTrackManager.preProcess(controlGeneRegsTempFn, controlGeneRegsExternalTN, 'bed', genome) #print tcGeneRegsExternalTN trackName1, trackName2 = tfTrackName, tcGeneRegsExternalTN analysisDef = 'Categories differentially located in targets?: Which categories of track1-points fall more inside case than control track2-segments? [rawStatistic:=PointCountInsideSegsStat:]' +\ '[tf1:=SegmentToStartPointFormatConverter:] [tf2:=TrivialFormatConverter:]' +\ '-> DivergentRowsInCategoryMatrixStat' regSpec, binSpec = '*', '*' #print 'skipping preproc!!' #ExternalTrackManager.preProcess(tcGeneRegsExternalTN[-1], tcGeneRegsExternalTN, 'targetcontrol.bedgraph', genome) #ExternalTrackManager.preProcess(targetGeneRegsTempFn, targetGeneRegsExternalTN, 'bed', genome) GalaxyInterface.runManual([trackName1, trackName2], analysisDef, regSpec, binSpec, genome, printResults=True, printHtmlWarningMsgs=False)
def getExternalTrackName(self): from quick.application.ExternalTrackManager import ExternalTrackManager name = ExternalTrackManager.extractNameFromHistoryTN(self._galaxyFn) return ExternalTrackManager.createStdTrackName(self.getId(), name)