Python Pipeline.run示例，Adda.Pipeline.run Python示例

示例#1

0

显示文件

文件： adda.py 项目： AndreasHeger/adda

def indexGraph(infile, outfile):
    '''index graph and store in compressed format.'''
    cmd = "index"
    to_cluster = True
    job_options = "-l mem_free=50G"
    statement = ADDA_STATEMENT
    P.run()

示例#2

0

显示文件

文件： adda.py 项目： AndreasHeger/adda

def indexSequences(infile, outfile ):
    '''index sequence database and map to internal identifiers.
    '''
    cmd = "sequences"
    statement = ADDA_STATEMENT
    to_cluster = True
    P.run()

示例#3

0

显示文件

文件： adda_annotate.py 项目： Rfam/rfam-website

def importReference( infile, outfile ):
    '''import reference domains.
    '''

    track = re.sub("[.].*", "", os.path.basename(infile ) )

    tablename_domains = "nrdb40_%s_domains" % track
    tablename_families = "nrdb40_%s_families" % track
    filename_families = re.sub( "domains", "families", infile )

    statement = '''
    python %(scriptsdir)s/DomainsReference.py 
		--Database=%(database)s
		--domains=%(database)s.%(tablename_domains)s_src
		--families=%(database)s.%(tablename_families)s_src
		--mapped_domains=%(database)s.%(tablename_domains)s
		--mapped_families=%(database)s.%(tablename_families)s
		--input=%(infile)s
		--descriptions=%(filename_families)s
		--source=%(database)s.%(eval_tablename_adda_nids)s
	  Create UpdateDomains MakeNonRedundantClone 
    > %(outfile)s
    '''

    P.run()

示例#4

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def buildOverlapTable( infiles, outfile ):
    '''calculate overlap between the different sources of domains.'''
    infiles = " ".join(infiles)
    statement = '''
    python %(scriptsdir)s/set_diff.py --add-percent %(infiles)s > %(outfile)s
    '''
    P.run()

示例#5

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def collectADDASequences( infile, outfile ):
    '''unpack adda sequences.'''

    if infile.endswith(".gz"):
        statement = '''gunzip < %(infile)s > %(outfile)s'''
    else:
        statement = '''ln -s %(infile)s %(outfile)s'''

    P.run()

示例#6

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def exportResults( infile, outfile ):
    '''export Adda results.'''
    
    statement = '''
    tar -cvzf %(outfile)s 
          %(output_result)s
          %(output_families)s
          %(output_summary)s
    '''
    P.run()

示例#7

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def reindexSequences( infile, outfile ):
    '''rebuild the adda sequence database from adda.nids.'''

    database = outfile[:-len(".fasta")]
    statement = '''
    awk '!/^nid/ { printf(">%%s\\n%%s\\n", $1, $5)};' 
    < %(infile)s
    | python %(scriptsdir)s/IndexedFasta.py %(database)s -
    > %(outfile)s.log'''

    P.run()

示例#8

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def collectTargetSequences( infiles, outfile ):
    '''extract new sequences from input.'''
        
    filename_target, filename_adda = infiles
    statement = '''
	python %(scriptsdir)s/map_fasta2fasta.py 
		--filename-reference=%(filename_adda)s
                --output-filename-pattern=target.%%s
		%(filename_target)s > %(outfile)s.log
    '''

    P.run()

示例#9

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def buildBlatIndex( infiles, outfile):
    '''build blat index.'''
    infiles = " ".join( infiles )

    statement = '''
    blat -dots=100 -prot 
                -makeOoc=%(outfile)s 
		-minIdentity=%(map_min_identity)i
		%(infiles)s %(outfile)s.log < /dev/null >> %(outfile)s.log
    '''

    P.run()

示例#10

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def exportPfam( infile, outfile ):
    '''export Adda results.'''
    
    outdir = time.strftime( "%Y_%m_%d", time.localtime(time.time()))

    statement = '''
    mkdir %(outdir)s;
    awk '!/^nid/ {printf("%%s\\n%%s\\n", $1, $5);}' < %(output_nids) > %(outdir)s/adda.fasta;
    ln -s ../adda.result %(outdir)s/adda.result;
    tar -cvzf %(outfile)s %(outdir)s;
    rm -rf %(outdir)s
    '''
    P.run()

示例#11

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def splitSequenceFile( infile, outfiles ):

    # patch ruffus bug
    if type(infile) == type(list()):
        infile = infile[0]

    statement = '''
       perl %(scriptsdir)s/split_fasta.pl 
            -a blat.dir/chunk_%%s.fasta %(map_chunksize)i
            < %(infile)s > split.log
       '''

    P.run()

示例#12

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def buildIndirectDomains( infiles, outfile ):
    '''collect domains mapped from domains mapped via BLAT.'''
    
    infiles = " ".join(infiles)
    statement = '''
	cat %(infiles)s |
	python %(scriptsdir)s/substitute_tokens.py 
		--apply=target.new2new.map
		--column=1 
		--invert \
		--filter > %(outfile)s
    '''
    P.run()

示例#13

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def buildMappingCoverage( infiles, outfile ):
    '''compute coverage of target sequences with ADDA domains.'''
    
    filename_domains, filename_lengths = infiles

    statement = '''
    python %(scriptsdir)s/adda2coverage.py 
		--log=%(outfile)s.log 
		--filename-lengths=%(filename_lengths)s 
                --output-filename-pattern="%(outfile)s_%%s"
    < %(filename_domains)s 
    > %(outfile)s
    '''
    P.run()

示例#14

0

显示文件

文件： adda_annotate.py 项目： Rfam/rfam-website

def importADDAResults( infile, outfile ):
    '''import ADDA results.'''

    statement = '''
	python %(scriptsdir)s/DomainsAdda.py 
		--Database=%(database)s
		--domains=%(database)s.nrdb40_%(tablename_adda)s_domains
		--families=%(database)s.nrdb40_%(tablename_adda)s_families
		--input=%(infile)s
		--source=%(database)s.%(eval_tablename_adda_nids)s
		Create Finalize UpdateDomains 
       > %(outfile)s
    '''
    P.run()

示例#15

0

显示文件

文件： addaFarm.py 项目： Rfam/rfam-website

def mapDomains( infile, outfile ):
    '''collect blat matching stats.'''

    to_cluster= True
    job_options = "-l mem_free=4000M"
    statement = '''bsub -q normal -R"select[mem>4000] rusage[mem=4000]" -M 4000000 -o %(infile)s.out2 -I "gunzip 
        < %(infile)s 
	| python %(scriptsdir)s/map_blat2adda.py 
		--filename-domains=adda.results
		--output-filename-pattern="%(outfile)s.%%s"
		--log=%(outfile)s.log 
		--verbose=2 
        > %(outfile)s"
        '''
    P.run()

示例#16

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def mapDomains( infile, outfile ):
    '''collect blat matching stats.'''

    to_cluster= True
    job_options = "-l mem_free=4000M"
    statement = '''gunzip 
        < %(infile)s 
	| python %(scriptsdir)s/map_blat2adda.py 
		--filename-domains=<( gunzip < %(map_filename_domains)s)
		--output-filename-pattern="%(outfile)s.%%s" 
		--log=%(outfile)s.log 
		--verbose=2 
        > %(outfile)s
        '''
    P.run()

示例#17

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def buildDirectDomains( infiles, outfile ):
    '''collect domains that could be transfered without mapping.'''
    
    x, filename_domains = infiles

    statement = '''gunzip
        < %(filename_domains)s 
	| python %(scriptsdir)s/substitute_tokens.py 
		--apply=target.new2old.map 
		--invert 
		--column=1 
		--filter 
	> %(outfile)s
    '''
    P.run()

示例#18

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def runBlat( infiles, outfile ):
    '''run a blat job.'''

    to_cluster = True
    infile, fasta = infiles
    statement = '''
    blat  
	  -prot
	  -ooc=5.ooc
	  -noHead
	  -minIdentity=%(map_min_identity)i 
	  %(fasta)s
	  %(infile)s
          stdout | gzip > %(outfile)s
    '''
    
    P.run()

示例#19

0

显示文件

文件： addaFarm.py 项目： Rfam/rfam-website

def runBlat( infiles, outfile ):
    '''run a blat job.'''

    to_cluster = True
    infile, fasta = infiles
    statement = '''bsub -q normal -R"select[mem>4000] rusage[mem=4000]" -M 4000000 -o %(fasta)s.out -I 
    "blat  
	  -prot
	  -ooc=5.ooc
	  -noHead
	  -minIdentity=%(map_min_identity)i 
	  %(fasta)s
	  %(infile)s
    %(infile)s.out &&
    cat %(infile)s.out | gzip > %(outfile)s"
    '''
    
    P.run()

示例#20

0

显示文件

文件： adda_annotate.py 项目： Rfam/rfam-website

def importADDAIntermediateResults( infile, outfile ):
    '''import the segmentation segments.

    Nids are translated.
    '''

    table = outfile[:-len(".import")]

    statement = '''
    python %(scriptsdir)s/adda_translate.py
       --nids=%(eval_filename_adda_nids)s
    < %(infile)s
    | python %(scriptsdir)s/csv2db.py 
        %(csv2db_options)s
        --database=%(database)s
	--table=%(table)s 
	--index=nid 
    > %(outfile)s
    '''
    
    P.run()

示例#21

0

显示文件

文件： adda_annotate.py 项目： Rfam/rfam-website

def importSequences( infile, outfile ):
    '''import sequences.

    This command will also create the database
    '''

    statement = '''
         mysql %(mysql_options)s -e "DROP DATABASE IF EXISTS %(load_database)s"
    '''
    	
    P.run()

    statement = '''
         mysql %(mysql_options)s -e "CREATE database %(load_database)s"
    '''
    	
    P.run()

    table = outfile[:-len(".import")]

    statement ='''
        perl -p -e "s/nid/adda_nid/; s/pid/nid/" 
        < %(infile)s 
	| python %(scriptsdir)s/csv2db.py 
        %(csv2db_options)s
           --database=%(database)s
	   --table=%(table)s 
	   --index=nid 
        > %(outfile)s
    '''

    P.run()

示例#22

0

显示文件

文件： adda_annotate.py 项目： Rfam/rfam-website

def annotateADDA( infile, outfile ):
    '''annotate ADDA families with reference families
    '''

    track = outfile[:-len(".annotations")]

    statement = '''
        python %(scriptsdir)s/OutputStatisticsClustering.py 
                --Database=%(database)s
		--domains=%(database)s.nrdb40_%(tablename_adda)s_domains 
		--families=%(database)s.nrdb40_%(tablename_adda)s_families
		--max_family=%(eval_max_family_size)i
		--min_evidence=2 
                --min_units=2 
		--ref_domains=%(database)s.nrdb40_%(track)s_domains 
		--ref_families=%(database)s.nrdb40_%(track)s_families
	        --full-table 
		Annotation 
        > %(outfile)s
        '''
    
    P.run()

    statement = '''
        perl %(scriptsdir)s/calculate_selectivity.pl < %(outfile)s > %(outfile)s.selectivity
    '''

    P.run()

    statement = '''
        perl %(scriptsdir)s/calculate_sensitivity.pl < %(outfile)s > %(outfile)s.sensitivity
    '''

    P.run()

示例#23

0

显示文件

文件： adda_annotate.py 项目： AndreasHeger/adda

def importSequences( infile, outfile ):
    '''import sequences into database.

    This command will also create the database
    '''

    statement = '''
         mysql %(load_mysql_options)s -e "DROP DATABASE IF EXISTS %(load_database)s"
    '''
    	
    P.run()

    statement = '''
         mysql %(load_mysql_options)s -e "CREATE database %(load_database)s"
    '''
    	
    P.run()

    statement ='''
        perl -p -e "s/nid/adda_nid/; s/pid/nid/" 
        < %(infile)s 
	| python %(scriptsdir)s/csv2db.py 
        %(load_csv2db_options)s
           --database=%(load_database)s
	   --table=%(load_tablename_adda_nrdb)s 
           --map=nid:int
	   --index=nid 
        > %(outfile)s
    '''

    P.run()

示例#24

0

显示文件

文件： adda_annotate.py 项目： Rfam/rfam-website

def evaluateDomains( infile, outfile ):
    '''benchmark domains.

    The domain benchmark checks if the appropriate domains have
    been selected by the optimisation method.
    '''

    track = outfile[:-len("_domains.eval")]

    statement = '''
    python %(scriptsdir)s/evaluate_domain_boundaries.py 
        --database=%(database)s
        --reference=%(database)s.nrdb40_%(track)s_domains
        --parts=%(database)s.%(eval_tablename_domains)s
        --output-filename-pattern=%(outfile)s.%%s
        --switch 
        --skip-repeats 
        --bin-size=1
    > %(outfile)s
    '''

    P.run()

示例#25

0

显示文件

文件： adda_annotate.py 项目： Rfam/rfam-website

def evaluateSegments( infile, outfile ):
    '''evaluate ADDA segments against reference

    The tree benchmark checks whether the segmentation algorithm
    contains the appropriate reference domains.
    '''

    track = outfile[:-len("_segments.eval")]

    statement = '''
    python %(scriptsdir)s/evaluate_domain_boundaries.py 
        --database=%(database)s 
        --reference=%(database)s.nrdb40_%(track)s_domains
        --trees=%(database)s.%(eval_tablename_segments)s
        --output-filename-pattern=%(outfile)s.%%s
        --switch 
        --skip-repeats 
        --no-full-length 
        --bin-size=1
    > %(outfile)s
    '''

    P.run()

示例#26

0

显示文件

文件： adda.py 项目： ProteinsWebTeam/Pfam

def alignDomains(infile, outfile):
    cmd = "align"
    statement = ADDA_STATEMENT
    P.run()

示例#27

0

显示文件

文件： adda.py 项目： ProteinsWebTeam/Pfam

def buildGraphStats(infile, outfile):
    cmd = "stats"
    statement = ADDA_STATEMENT
    P.run()

示例#28

0

显示文件

文件： adda.py 项目： ProteinsWebTeam/Pfam

def segmentSequences(infile, outfile):
    cmd = "segment"
    statement = ADDA_STATEMENT
    P.run()

示例#29

0

显示文件

文件： adda.py 项目： ProteinsWebTeam/Pfam

def convertToDomainGraph(infile, outfile):
    cmd = "convert"
    statement = ADDA_STATEMENT
    P.run()

示例#30

0

显示文件

文件： adda.py 项目： ProteinsWebTeam/Pfam

def optimiseSegments(infile, outfile):
    cmd = "optimise"
    statement = ADDA_STATEMENT
    P.run()

示例#31

0

显示文件

文件： adda.py 项目： ProteinsWebTeam/Pfam

def computeMSTComponents(infile, outfile):
    cmd = "mst-components"
    statement = ADDA_STATEMENT
    P.run()

示例#32

0

显示文件

文件： adda.py 项目： ProteinsWebTeam/Pfam

def buildMST(infile, outfile):
    cmd = "mst"
    statement = ADDA_STATEMENT
    P.run()

示例#33

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def indexGraph(infile, outfile):
    '''index graph and store in compressed format.'''
    cmd = "index"
    statement = ADDA_STATEMENT
    P.run()

示例#34

0

显示文件

文件： adda.py 项目： ProteinsWebTeam/Pfam

def buildFamilies(infile, outfile):
    cmd = "families"
    statement = ADDA_STATEMENT
    P.run()

示例#35

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def clusterDomains(infile, outfile):
    cmd = "cluster"
    statement = ADDA_STATEMENT
    P.run()

示例#36

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def buildFamilies(infile, outfile):
    cmd = "families"
    statement = ADDA_STATEMENT
    P.run()

示例#37

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def buildAddaSummary(infile, outfile):
    cmd = "summary"
    statement = ADDA_STATEMENT
    P.run()

示例#38

0

显示文件

文件： adda.py 项目： Rfam/rfam-website

def computeParameters(infile, outfile ):
    '''pre-process graph.'''
    cmd = "fit"
    statement = ADDA_STATEMENT
    P.run()

示例#39

0

显示文件

文件： adda.py 项目： ProteinsWebTeam/Pfam

def clusterDomains(infile, outfile):
    cmd = "cluster"
    statement = ADDA_STATEMENT
    P.run()