Пример #1
0
	def _makeMasterMan( target, source, env ):
		strT, astrSs = sfle.ts( target, source )
		pHash = pickle.load(open(astrSs[0]))
		def _metaStr( strDescription, strContent ):
			return "\\"+ strDescription + "{" + strContent + "}"
						
		strDataAccession = arepa.cwd( ) + "-package"
		strDataTitle = pHash.get( "title" ) or ""
		strDataGloss = pHash.get( "gloss" ) or ""
	
		aastrOut = [("name", strDataAccession), ("title", strDataTitle), ("description", strDataGloss)]
		
		with open( strT, "w" ) as outputf:
			for strDescription, strContent in aastrOut:
				outputf.write( _metaStr( strDescription, strContent ) + "\n" )
Пример #2
0
import arepa
import gzip
import os
import re
import sfle
import sys

def test( iLevel, strID, hashArgs ):
	return ( iLevel == 1 ) and ( strID.find( "GDS" ) == 0 )
if "testing" in locals( ):
	sys.exit( )

pE = DefaultEnvironment( )

c_strID				= arepa.cwd( )
c_fileIDTXT			= sfle.d( pE, c_strID + ".txt" )
c_fileIDSOFTGZ		= sfle.d( pE, c_strID + ".soft.gz" )

Import( "hashArgs" )

#===============================================================================
# Download SOFT file
#===============================================================================

sfle.download( pE, hashArgs["c_strURLGDS"] + os.path.basename( str(c_fileIDSOFTGZ) ) )

def funcGPLsTXT( target, source, env ):
	strT, astrSs = sfle.ts( target, source )
	setstrGPLs = set()
	for strLine in gzip.open( astrSs[0] ):
Пример #3
0
def funcGeneIDMapping(pE,
                      fileDATin,
                      strGeneFrom=None,
                      fileLOGout=None,
                      strMAPin=None,
                      aiCOL=[0, 1],
                      iSkip=0,
                      iLevel=2):
    try:
        int(strMAPin)
        strTaxID, strMAPin = strMAPin, None
    except (ValueError, TypeError):
        strTaxID = None
    # Try to find taxid
    if not strTaxID:
        astrMatch = re.findall(r'taxid_([0-9]+)', c_strID)
        strTaxID = astrMatch[0].strip() if astrMatch else None
    if not strGeneFrom:
        #if strGeneFrom is not specified, try to retrieve it from
        #the "manual_geneids.txt" file in the respective etc directory, else None
        #this problem tends to arise in mixed microbial network data
        if strTaxID and m_hashGeneIDs:
            strOrg = " ".join(arepa.taxid2org(strTaxID).split(" ")[:iLevel])
            strGeneFrom = m_hashGeneIDs.get(strOrg)
    if not (strMAPin):
        strDirAuto = c_strDirData if (c_strID == c_strPathRepo) else ""
        strAutoMAPtmp = sfle.d(strDirAuto, c_strID + c_strSufMap)
        strAutoMAP = strAutoMAPtmp if os.path.exists(strAutoMAPtmp) else None
        # Use manual mapping files, files that are deeper down have priority
        #else try automatically generated mapping file
        afileMaps = Glob(sfle.d(c_strPathTopMapping, "*" + c_strSufMap))
        strTopMAP = str(afileMaps[0]) if afileMaps else None
        strMAPManTmp = sfle.d(c_strDirManMap, c_strID + c_strSufMap)
        strMAPin = (strMAPManTmp if os.path.exists(strMAPManTmp) else
                    None) or strTopMAP or strAutoMAP
        # Use provided mapping files
        if not (strMAPin) and strTaxID:
            for fileMAPname in Glob(
                    sfle.d(c_strPathUniprotKO, "*" + c_strSufMap)):
                if re.search(r'\D' + strTaxID + r'\D', str(fileMAPname)):
                    strMAPin = str(fileMAPname)
                    break
        # Else ask arepa to figure out an appropriate mapping file
        if not (strMAPin) and strTaxID:
            strMAPin = arepa.get_mappingfile(strTaxID)
        # Else use BridgeDB files
        if not (strMAPin) and strTaxID:
            for strSpeciesName in list(c_pHashBridgeDB.keys()):
                if strSpeciesName in arepa.taxid2org(strTaxID):
                    strMAPin = c_pHashBridgeDB[strSpeciesName]
                    break

    strBase, strExt = os.path.splitext(str(fileDATin))
    strCount = funcCounter(g_iterCounter)
    strT = strBase + c_strMapped + strCount + strExt

    aastrPrefix = [[str(fileDATin)], [True, strT], "-c", str(aiCOL)]
    astrGeneFrom = ["-f", strGeneFrom] if strGeneFrom else ["-x"]
    astrGeneTo = ["-t", c_astrGeneTo[0]]
    astrSkip = ["-s", iSkip]
    afileLOGout = ["-l", [True, str(fileLOGout)]] if fileLOGout else []
    astrMapIn = ["-m", [strMAPin]] if strMAPin else []

    aastrArgs = aastrPrefix + astrGeneFrom + astrGeneTo + astrSkip + afileLOGout + astrMapIn

    pE.Precious(strMAPin)
    return pE.Depends(
        sfle.op(pE, c_funcGeneMapper, aastrArgs),
        sfle.scons_child(
            pE, c_strPathGeneMapper, None, None,
            None, ([strMAPin] if not (sfle.in_directory(
                strMAPin, os.path.abspath(arepa.cwd()))) else None)))
Пример #4
0
substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""

import arepa
import csv
import re
import sfle
import sys

c_repo = arepa.cwd()
c_strTaxid = c_repo + "_taxid_"
c_strMode = "mode_"


def symbol(hashSymbols, strValue):
    return hashSymbols.setdefault(strValue, len(hashSymbols))


if len(sys.argv) < 1:
    raise Exception("Usage: string2c.py [taxa] < <string.txt>")
iMin = int(sys.argv[1])
strTaxa = None if (len(sys.argv) <= 2) else sys.argv[2]

setTaxa = arepa.taxa(strTaxa)
Пример #5
0
import sfle
import sys
import ftplib
import re


def test(iLevel, strID, hashArgs):
    return (iLevel == 3) and (strID.find("GSE") == 0)


if "testing" in locals():
    sys.exit()

pE = DefaultEnvironment()

c_strID = arepa.cwd().replace("-RAW", "")
c_strURLGEO = 'ftp.ncbi.nih.gov'
c_strURLGEOsupp = 'pub/geo/DATA/supplementary/samples/'
c_strURLSupp = 'ftp://' + c_strURLGEO + '/' + c_strURLGEOsupp
c_strFileGSM = "../GSM.txt"
c_strFilePCL = "../" + c_strID + ".pcl"

c_listTs = sfle.readcomment(c_strFileGSM)
c_fileProgReadCel = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc,
                           "readCel.R")
c_fileProgProcessRaw = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc,
                              "preprocessRaw.R")
c_strInputRData = arepa.cwd() + ".RData"
c_strOutputRData = c_strInputRData.replace("-RAW", "")

c_filePPfun = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "preprocess")
Пример #6
0
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""

import arepa
import os
import sfle
import sys

c_afileSConscripts = sorted(
    Glob(sfle.d(arepa.path_repo(), sfle.c_strDirSrc, "SConscript*")))

if os.path.isfile("SConscript"):
    exec(compile(open("SConscript").read(), "SConscript", 'exec'))
else:
    hashArgs = {}

# I tried very hard to do this using import, but I can't find a way to prematurely
# halt an import without sys.exit, which kills the entire process.
for fileSConscript in c_afileSConscripts:
    strSConscript = fileSConscript.get_abspath()
    hashEnv = {"test": lambda *a: False, "testing": True}
    try:
        exec(compile(open(strSConscript).read(), strSConscript, 'exec'),
             hashEnv)
    except SystemExit:
        pass
    if hashEnv["test"](arepa.level(), arepa.cwd(), hashArgs):
        exec(compile(open(strSConscript).read(), strSConscript, 'exec'))