示例#1
0
文件: adda.py 项目: AndreasHeger/adda
def indexGraph(infile, outfile):
    '''index graph and store in compressed format.'''
    cmd = "index"
    to_cluster = True
    job_options = "-l mem_free=50G"
    statement = ADDA_STATEMENT
    P.run()
示例#2
0
文件: adda.py 项目: AndreasHeger/adda
def indexSequences(infile, outfile ):
    '''index sequence database and map to internal identifiers.
    '''
    cmd = "sequences"
    statement = ADDA_STATEMENT
    to_cluster = True
    P.run()
示例#3
0
def importReference( infile, outfile ):
    '''import reference domains.
    '''

    track = re.sub("[.].*", "", os.path.basename(infile ) )

    tablename_domains = "nrdb40_%s_domains" % track
    tablename_families = "nrdb40_%s_families" % track
    filename_families = re.sub( "domains", "families", infile )

    statement = '''
    python %(scriptsdir)s/DomainsReference.py 
		--Database=%(database)s
		--domains=%(database)s.%(tablename_domains)s_src
		--families=%(database)s.%(tablename_families)s_src
		--mapped_domains=%(database)s.%(tablename_domains)s
		--mapped_families=%(database)s.%(tablename_families)s
		--input=%(infile)s
		--descriptions=%(filename_families)s
		--source=%(database)s.%(eval_tablename_adda_nids)s
	  Create UpdateDomains MakeNonRedundantClone 
    > %(outfile)s
    '''

    P.run()
示例#4
0
文件: adda.py 项目: Rfam/rfam-website
def buildOverlapTable( infiles, outfile ):
    '''calculate overlap between the different sources of domains.'''
    infiles = " ".join(infiles)
    statement = '''
    python %(scriptsdir)s/set_diff.py --add-percent %(infiles)s > %(outfile)s
    '''
    P.run()
示例#5
0
文件: adda.py 项目: Rfam/rfam-website
def collectADDASequences( infile, outfile ):
    '''unpack adda sequences.'''

    if infile.endswith(".gz"):
        statement = '''gunzip < %(infile)s > %(outfile)s'''
    else:
        statement = '''ln -s %(infile)s %(outfile)s'''

    P.run()
示例#6
0
文件: adda.py 项目: Rfam/rfam-website
def exportResults( infile, outfile ):
    '''export Adda results.'''
    
    statement = '''
    tar -cvzf %(outfile)s 
          %(output_result)s
          %(output_families)s
          %(output_summary)s
    '''
    P.run()
示例#7
0
文件: adda.py 项目: Rfam/rfam-website
def reindexSequences( infile, outfile ):
    '''rebuild the adda sequence database from adda.nids.'''

    database = outfile[:-len(".fasta")]
    statement = '''
    awk '!/^nid/ { printf(">%%s\\n%%s\\n", $1, $5)};' 
    < %(infile)s
    | python %(scriptsdir)s/IndexedFasta.py %(database)s -
    > %(outfile)s.log'''

    P.run()
示例#8
0
文件: adda.py 项目: Rfam/rfam-website
def collectTargetSequences( infiles, outfile ):
    '''extract new sequences from input.'''
        
    filename_target, filename_adda = infiles
    statement = '''
	python %(scriptsdir)s/map_fasta2fasta.py 
		--filename-reference=%(filename_adda)s
                --output-filename-pattern=target.%%s
		%(filename_target)s > %(outfile)s.log
    '''

    P.run()
示例#9
0
文件: adda.py 项目: Rfam/rfam-website
def buildBlatIndex( infiles, outfile):
    '''build blat index.'''
    infiles = " ".join( infiles )

    statement = '''
    blat -dots=100 -prot 
                -makeOoc=%(outfile)s 
		-minIdentity=%(map_min_identity)i
		%(infiles)s %(outfile)s.log < /dev/null >> %(outfile)s.log
    '''

    P.run()
示例#10
0
文件: adda.py 项目: Rfam/rfam-website
def exportPfam( infile, outfile ):
    '''export Adda results.'''
    
    outdir = time.strftime( "%Y_%m_%d", time.localtime(time.time()))

    statement = '''
    mkdir %(outdir)s;
    awk '!/^nid/ {printf("%%s\\n%%s\\n", $1, $5);}' < %(output_nids) > %(outdir)s/adda.fasta;
    ln -s ../adda.result %(outdir)s/adda.result;
    tar -cvzf %(outfile)s %(outdir)s;
    rm -rf %(outdir)s
    '''
    P.run()
示例#11
0
文件: adda.py 项目: Rfam/rfam-website
def splitSequenceFile( infile, outfiles ):

    # patch ruffus bug
    if type(infile) == type(list()):
        infile = infile[0]

    statement = '''
       perl %(scriptsdir)s/split_fasta.pl 
            -a blat.dir/chunk_%%s.fasta %(map_chunksize)i
            < %(infile)s > split.log
       '''

    P.run()
示例#12
0
文件: adda.py 项目: Rfam/rfam-website
def buildIndirectDomains( infiles, outfile ):
    '''collect domains mapped from domains mapped via BLAT.'''
    
    infiles = " ".join(infiles)
    statement = '''
	cat %(infiles)s |
	python %(scriptsdir)s/substitute_tokens.py 
		--apply=target.new2new.map
		--column=1 
		--invert \
		--filter > %(outfile)s
    '''
    P.run()
示例#13
0
文件: adda.py 项目: Rfam/rfam-website
def buildMappingCoverage( infiles, outfile ):
    '''compute coverage of target sequences with ADDA domains.'''
    
    filename_domains, filename_lengths = infiles

    statement = '''
    python %(scriptsdir)s/adda2coverage.py 
		--log=%(outfile)s.log 
		--filename-lengths=%(filename_lengths)s 
                --output-filename-pattern="%(outfile)s_%%s"
    < %(filename_domains)s 
    > %(outfile)s
    '''
    P.run()
示例#14
0
def importADDAResults( infile, outfile ):
    '''import ADDA results.'''

    statement = '''
	python %(scriptsdir)s/DomainsAdda.py 
		--Database=%(database)s
		--domains=%(database)s.nrdb40_%(tablename_adda)s_domains
		--families=%(database)s.nrdb40_%(tablename_adda)s_families
		--input=%(infile)s
		--source=%(database)s.%(eval_tablename_adda_nids)s
		Create Finalize UpdateDomains 
       > %(outfile)s
    '''
    P.run()
示例#15
0
def mapDomains( infile, outfile ):
    '''collect blat matching stats.'''

    to_cluster= True
    job_options = "-l mem_free=4000M"
    statement = '''bsub -q normal -R"select[mem>4000] rusage[mem=4000]" -M 4000000 -o %(infile)s.out2 -I "gunzip 
        < %(infile)s 
	| python %(scriptsdir)s/map_blat2adda.py 
		--filename-domains=adda.results
		--output-filename-pattern="%(outfile)s.%%s"
		--log=%(outfile)s.log 
		--verbose=2 
        > %(outfile)s"
        '''
    P.run()
示例#16
0
文件: adda.py 项目: Rfam/rfam-website
def mapDomains( infile, outfile ):
    '''collect blat matching stats.'''

    to_cluster= True
    job_options = "-l mem_free=4000M"
    statement = '''gunzip 
        < %(infile)s 
	| python %(scriptsdir)s/map_blat2adda.py 
		--filename-domains=<( gunzip < %(map_filename_domains)s)
		--output-filename-pattern="%(outfile)s.%%s" 
		--log=%(outfile)s.log 
		--verbose=2 
        > %(outfile)s
        '''
    P.run()
示例#17
0
文件: adda.py 项目: Rfam/rfam-website
def buildDirectDomains( infiles, outfile ):
    '''collect domains that could be transfered without mapping.'''
    
    x, filename_domains = infiles

    statement = '''gunzip
        < %(filename_domains)s 
	| python %(scriptsdir)s/substitute_tokens.py 
		--apply=target.new2old.map 
		--invert 
		--column=1 
		--filter 
	> %(outfile)s
    '''
    P.run()
示例#18
0
文件: adda.py 项目: Rfam/rfam-website
def runBlat( infiles, outfile ):
    '''run a blat job.'''

    to_cluster = True
    infile, fasta = infiles
    statement = '''
    blat  
	  -prot
	  -ooc=5.ooc
	  -noHead
	  -minIdentity=%(map_min_identity)i 
	  %(fasta)s
	  %(infile)s
          stdout | gzip > %(outfile)s
    '''
    
    P.run()
示例#19
0
def runBlat( infiles, outfile ):
    '''run a blat job.'''

    to_cluster = True
    infile, fasta = infiles
    statement = '''bsub -q normal -R"select[mem>4000] rusage[mem=4000]" -M 4000000 -o %(fasta)s.out -I 
    "blat  
	  -prot
	  -ooc=5.ooc
	  -noHead
	  -minIdentity=%(map_min_identity)i 
	  %(fasta)s
	  %(infile)s
    %(infile)s.out &&
    cat %(infile)s.out | gzip > %(outfile)s"
    '''
    
    P.run()
示例#20
0
def importADDAIntermediateResults( infile, outfile ):
    '''import the segmentation segments.

    Nids are translated.
    '''

    table = outfile[:-len(".import")]

    statement = '''
    python %(scriptsdir)s/adda_translate.py
       --nids=%(eval_filename_adda_nids)s
    < %(infile)s
    | python %(scriptsdir)s/csv2db.py 
        %(csv2db_options)s
        --database=%(database)s
	--table=%(table)s 
	--index=nid 
    > %(outfile)s
    '''
    
    P.run()
示例#21
0
def importSequences( infile, outfile ):
    '''import sequences.

    This command will also create the database
    '''

    statement = '''
         mysql %(mysql_options)s -e "DROP DATABASE IF EXISTS %(load_database)s"
    '''
    	
    P.run()

    statement = '''
         mysql %(mysql_options)s -e "CREATE database %(load_database)s"
    '''
    	
    P.run()

    table = outfile[:-len(".import")]

    statement ='''
        perl -p -e "s/nid/adda_nid/; s/pid/nid/" 
        < %(infile)s 
	| python %(scriptsdir)s/csv2db.py 
        %(csv2db_options)s
           --database=%(database)s
	   --table=%(table)s 
	   --index=nid 
        > %(outfile)s
    '''

    P.run()
示例#22
0
def annotateADDA( infile, outfile ):
    '''annotate ADDA families with reference families
    '''

    track = outfile[:-len(".annotations")]

    statement = '''
        python %(scriptsdir)s/OutputStatisticsClustering.py 
                --Database=%(database)s
		--domains=%(database)s.nrdb40_%(tablename_adda)s_domains 
		--families=%(database)s.nrdb40_%(tablename_adda)s_families
		--max_family=%(eval_max_family_size)i
		--min_evidence=2 
                --min_units=2 
		--ref_domains=%(database)s.nrdb40_%(track)s_domains 
		--ref_families=%(database)s.nrdb40_%(track)s_families
	        --full-table 
		Annotation 
        > %(outfile)s
        '''
    
    P.run()

    statement = '''
        perl %(scriptsdir)s/calculate_selectivity.pl < %(outfile)s > %(outfile)s.selectivity
    '''

    P.run()

    statement = '''
        perl %(scriptsdir)s/calculate_sensitivity.pl < %(outfile)s > %(outfile)s.sensitivity
    '''

    P.run()
示例#23
0
def importSequences( infile, outfile ):
    '''import sequences into database.

    This command will also create the database
    '''

    statement = '''
         mysql %(load_mysql_options)s -e "DROP DATABASE IF EXISTS %(load_database)s"
    '''
    	
    P.run()

    statement = '''
         mysql %(load_mysql_options)s -e "CREATE database %(load_database)s"
    '''
    	
    P.run()

    statement ='''
        perl -p -e "s/nid/adda_nid/; s/pid/nid/" 
        < %(infile)s 
	| python %(scriptsdir)s/csv2db.py 
        %(load_csv2db_options)s
           --database=%(load_database)s
	   --table=%(load_tablename_adda_nrdb)s 
           --map=nid:int
	   --index=nid 
        > %(outfile)s
    '''

    P.run()
示例#24
0
def evaluateDomains( infile, outfile ):
    '''benchmark domains.

    The domain benchmark checks if the appropriate domains have
    been selected by the optimisation method.
    '''

    track = outfile[:-len("_domains.eval")]

    statement = '''
    python %(scriptsdir)s/evaluate_domain_boundaries.py 
        --database=%(database)s
        --reference=%(database)s.nrdb40_%(track)s_domains
        --parts=%(database)s.%(eval_tablename_domains)s
        --output-filename-pattern=%(outfile)s.%%s
        --switch 
        --skip-repeats 
        --bin-size=1
    > %(outfile)s
    '''

    P.run()
示例#25
0
def evaluateSegments( infile, outfile ):
    '''evaluate ADDA segments against reference

    The tree benchmark checks whether the segmentation algorithm
    contains the appropriate reference domains.
    '''

    track = outfile[:-len("_segments.eval")]

    statement = '''
    python %(scriptsdir)s/evaluate_domain_boundaries.py 
        --database=%(database)s 
        --reference=%(database)s.nrdb40_%(track)s_domains
        --trees=%(database)s.%(eval_tablename_segments)s
        --output-filename-pattern=%(outfile)s.%%s
        --switch 
        --skip-repeats 
        --no-full-length 
        --bin-size=1
    > %(outfile)s
    '''

    P.run()
示例#26
0
def alignDomains(infile, outfile):
    cmd = "align"
    statement = ADDA_STATEMENT
    P.run()
示例#27
0
def buildGraphStats(infile, outfile):
    cmd = "stats"
    statement = ADDA_STATEMENT
    P.run()
示例#28
0
def segmentSequences(infile, outfile):
    cmd = "segment"
    statement = ADDA_STATEMENT
    P.run()
示例#29
0
def convertToDomainGraph(infile, outfile):
    cmd = "convert"
    statement = ADDA_STATEMENT
    P.run()
示例#30
0
def optimiseSegments(infile, outfile):
    cmd = "optimise"
    statement = ADDA_STATEMENT
    P.run()
示例#31
0
def computeMSTComponents(infile, outfile):
    cmd = "mst-components"
    statement = ADDA_STATEMENT
    P.run()
示例#32
0
def buildMST(infile, outfile):
    cmd = "mst"
    statement = ADDA_STATEMENT
    P.run()
示例#33
0
文件: adda.py 项目: Rfam/rfam-website
def indexGraph(infile, outfile):
    '''index graph and store in compressed format.'''
    cmd = "index"
    statement = ADDA_STATEMENT
    P.run()
示例#34
0
def buildFamilies(infile, outfile):
    cmd = "families"
    statement = ADDA_STATEMENT
    P.run()
示例#35
0
文件: adda.py 项目: Rfam/rfam-website
def clusterDomains(infile, outfile):
    cmd = "cluster"
    statement = ADDA_STATEMENT
    P.run()
示例#36
0
文件: adda.py 项目: Rfam/rfam-website
def buildFamilies(infile, outfile):
    cmd = "families"
    statement = ADDA_STATEMENT
    P.run()
示例#37
0
文件: adda.py 项目: Rfam/rfam-website
def buildAddaSummary(infile, outfile):
    cmd = "summary"
    statement = ADDA_STATEMENT
    P.run()
示例#38
0
文件: adda.py 项目: Rfam/rfam-website
def computeParameters(infile, outfile ):
    '''pre-process graph.'''
    cmd = "fit"
    statement = ADDA_STATEMENT
    P.run()
示例#39
0
def clusterDomains(infile, outfile):
    cmd = "cluster"
    statement = ADDA_STATEMENT
    P.run()