Пример #1
0
    def findOverrepresentedTFsFromGeneSet(genome, tfSource, ensembleGeneIdList,upFlankSize, downFlankSize, geneSource, galaxyFn):
        #galaxyFn = '/usit/insilico/web/lookalike/galaxy_dist-20090924-dev/database/files/003/dataset_3347.dat'
        #print 'overriding galaxyFN!: ', galaxyFn
        galaxyId = extractIdFromGalaxyFn(galaxyFn)
        uniqueWebPath = getUniqueWebPath(extractIdFromGalaxyFn(galaxyFn))

        assert genome == 'hg18'
        
        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        tfTrackName = tfTrackNameMappings[tfSource]
        
        
        #Get gene track
        assert geneSource == 'Ensembl'
        targetGeneRegsTempFn = uniqueWebPath + os.sep + 'geneRegs.bed'
        geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome)
        geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed')
        GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName, ensembleGeneIdList, targetGeneRegsTempFn )
        
        assert upFlankSize == downFlankSize == 0 #Should instead extend regions to include flanks
        
        tcGeneRegsTempFn = uniqueWebPath + os.sep + 'tcGeneRegs.targetcontrol.bedgraph'
        #Think this will be okay, subtraction not necessary as targets are put first:
        controlGeneRegsTempFn = geneRegsFn
        #print targetGeneRegsTempFn, controlGeneRegsTempFn, tcGeneRegsTempFn
        GalaxyInterface.combineToTargetControl(targetGeneRegsTempFn, controlGeneRegsTempFn, tcGeneRegsTempFn)
        
        #tcGeneRegsExternalTN = ['external'] +galaxyId +  [tcGeneRegsTempFn]
        tcGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(galaxyId, 'tempTc')
        
        #tcGeneRegsExternalTN = ['external'] +targetGalaxyId +  [tcGeneRegsTempFn]
        #tcGeneRegsExternalTN = ['galaxy', externalId, tcGeneRegsTempFn]
        
        targetGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(galaxyId, 'tempTc', '1')
        controlGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(galaxyId, 'tempTc', '0')
        
        #pre-process
        print 'Pre-processing file: %s, with trackname: %s ' % (tcGeneRegsTempFn, tcGeneRegsExternalTN)
        ExternalTrackManager.preProcess(tcGeneRegsTempFn, tcGeneRegsExternalTN, 'targetcontrol.bedgraph',genome)
        print 'Pre-processing TN: ', targetGeneRegsExternalTN
        ExternalTrackManager.preProcess(targetGeneRegsTempFn, targetGeneRegsExternalTN, 'bed',genome)
        print 'Pre-processing TN: ', controlGeneRegsExternalTN
        ExternalTrackManager.preProcess(controlGeneRegsTempFn, controlGeneRegsExternalTN, 'bed',genome)
        
        #print tcGeneRegsExternalTN
        trackName1, trackName2 = tfTrackName, tcGeneRegsExternalTN
        
        analysisDef = 'Categories differentially located in targets?: Which categories of track1-points fall more inside case than control track2-segments? [rawStatistic:=PointCountInsideSegsStat:]' +\
                  '[tf1:=SegmentToStartPointFormatConverter:] [tf2:=TrivialFormatConverter:]' +\
                  '-> DivergentRowsInCategoryMatrixStat'
        regSpec, binSpec = '*','*'
        
        #print 'skipping preproc!!'
        #ExternalTrackManager.preProcess(tcGeneRegsExternalTN[-1], tcGeneRegsExternalTN, 'targetcontrol.bedgraph', genome)
        #ExternalTrackManager.preProcess(targetGeneRegsTempFn, targetGeneRegsExternalTN, 'bed', genome)
        
        GalaxyInterface.runManual([trackName1, trackName2], analysisDef, regSpec, binSpec, genome, printResults=True, printHtmlWarningMsgs=False)
Пример #2
0
    def findOverrepresentedTFsFromGeneSet(genome, tfSource, ensembleGeneIdList,
                                          upFlankSize, downFlankSize,
                                          geneSource, galaxyFn):
        #galaxyFn = '/usit/insilico/web/lookalike/galaxy_dist-20090924-dev/database/files/003/dataset_3347.dat'
        #print 'overriding galaxyFN!: ', galaxyFn
        galaxyId = extractIdFromGalaxyFn(galaxyFn)
        uniqueWebPath = GalaxyRunSpecificFile([], galaxyFn).getDiskPath()

        assert genome == 'hg18'

        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        tfTrackName = tfTrackNameMappings[tfSource]

        #Get gene track
        assert geneSource == 'Ensembl'
        targetGeneRegsTempFn = uniqueWebPath + os.sep + 'geneRegs.bed'
        geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome)
        geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed')
        GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName,
                                                 ensembleGeneIdList,
                                                 targetGeneRegsTempFn)

        assert upFlankSize == downFlankSize == 0  #Should instead extend regions to include flanks

        tcGeneRegsTempFn = uniqueWebPath + os.sep + 'tcGeneRegs.targetcontrol.bedgraph'
        #Think this will be okay, subtraction not necessary as targets are put first:
        controlGeneRegsTempFn = geneRegsFn
        #print targetGeneRegsTempFn, controlGeneRegsTempFn, tcGeneRegsTempFn
        GalaxyInterface.combineToTargetControl(targetGeneRegsTempFn,
                                               controlGeneRegsTempFn,
                                               tcGeneRegsTempFn)

        #tcGeneRegsExternalTN = ['external'] +galaxyId +  [tcGeneRegsTempFn]
        tcGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(
            galaxyId + ['tempTc'])

        #tcGeneRegsExternalTN = ['external'] +targetGalaxyId +  [tcGeneRegsTempFn]
        #tcGeneRegsExternalTN = ['galaxy', externalId, tcGeneRegsTempFn]

        targetGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(
            galaxyId + ['tempTc', '1'])
        controlGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(
            galaxyId + ['tempTc', '0'])

        #pre-process
        print 'Pre-processing file: %s, with trackname: %s ' % (
            tcGeneRegsTempFn, tcGeneRegsExternalTN)
        ExternalTrackManager.preProcess(tcGeneRegsTempFn, tcGeneRegsExternalTN,
                                        'targetcontrol.bedgraph', genome)
        print 'Pre-processing TN: ', targetGeneRegsExternalTN
        ExternalTrackManager.preProcess(targetGeneRegsTempFn,
                                        targetGeneRegsExternalTN, 'bed',
                                        genome)
        print 'Pre-processing TN: ', controlGeneRegsExternalTN
        ExternalTrackManager.preProcess(controlGeneRegsTempFn,
                                        controlGeneRegsExternalTN, 'bed',
                                        genome)

        #print tcGeneRegsExternalTN
        trackName1, trackName2 = tfTrackName, tcGeneRegsExternalTN

        analysisDef = 'Categories differentially located in targets?: Which categories of track1-points fall more inside case than control track2-segments? [rawStatistic:=PointCountInsideSegsStat:]' +\
                  '[tf1:=SegmentToStartPointFormatConverter:] [tf2:=TrivialFormatConverter:]' +\
                  '-> DivergentRowsInCategoryMatrixStat'
        regSpec, binSpec = '*', '*'

        #print 'skipping preproc!!'
        #ExternalTrackManager.preProcess(tcGeneRegsExternalTN[-1], tcGeneRegsExternalTN, 'targetcontrol.bedgraph', genome)
        #ExternalTrackManager.preProcess(targetGeneRegsTempFn, targetGeneRegsExternalTN, 'bed', genome)

        GalaxyInterface.runManual([trackName1, trackName2],
                                  analysisDef,
                                  regSpec,
                                  binSpec,
                                  genome,
                                  printResults=True,
                                  printHtmlWarningMsgs=False)