def _makeMasterMan( target, source, env ): strT, astrSs = sfle.ts( target, source ) pHash = pickle.load(open(astrSs[0])) def _metaStr( strDescription, strContent ): return "\\"+ strDescription + "{" + strContent + "}" strDataAccession = arepa.cwd( ) + "-package" strDataTitle = pHash.get( "title" ) or "" strDataGloss = pHash.get( "gloss" ) or "" aastrOut = [("name", strDataAccession), ("title", strDataTitle), ("description", strDataGloss)] with open( strT, "w" ) as outputf: for strDescription, strContent in aastrOut: outputf.write( _metaStr( strDescription, strContent ) + "\n" )
import arepa import gzip import os import re import sfle import sys def test( iLevel, strID, hashArgs ): return ( iLevel == 1 ) and ( strID.find( "GDS" ) == 0 ) if "testing" in locals( ): sys.exit( ) pE = DefaultEnvironment( ) c_strID = arepa.cwd( ) c_fileIDTXT = sfle.d( pE, c_strID + ".txt" ) c_fileIDSOFTGZ = sfle.d( pE, c_strID + ".soft.gz" ) Import( "hashArgs" ) #=============================================================================== # Download SOFT file #=============================================================================== sfle.download( pE, hashArgs["c_strURLGDS"] + os.path.basename( str(c_fileIDSOFTGZ) ) ) def funcGPLsTXT( target, source, env ): strT, astrSs = sfle.ts( target, source ) setstrGPLs = set() for strLine in gzip.open( astrSs[0] ):
def funcGeneIDMapping(pE, fileDATin, strGeneFrom=None, fileLOGout=None, strMAPin=None, aiCOL=[0, 1], iSkip=0, iLevel=2): try: int(strMAPin) strTaxID, strMAPin = strMAPin, None except (ValueError, TypeError): strTaxID = None # Try to find taxid if not strTaxID: astrMatch = re.findall(r'taxid_([0-9]+)', c_strID) strTaxID = astrMatch[0].strip() if astrMatch else None if not strGeneFrom: #if strGeneFrom is not specified, try to retrieve it from #the "manual_geneids.txt" file in the respective etc directory, else None #this problem tends to arise in mixed microbial network data if strTaxID and m_hashGeneIDs: strOrg = " ".join(arepa.taxid2org(strTaxID).split(" ")[:iLevel]) strGeneFrom = m_hashGeneIDs.get(strOrg) if not (strMAPin): strDirAuto = c_strDirData if (c_strID == c_strPathRepo) else "" strAutoMAPtmp = sfle.d(strDirAuto, c_strID + c_strSufMap) strAutoMAP = strAutoMAPtmp if os.path.exists(strAutoMAPtmp) else None # Use manual mapping files, files that are deeper down have priority #else try automatically generated mapping file afileMaps = Glob(sfle.d(c_strPathTopMapping, "*" + c_strSufMap)) strTopMAP = str(afileMaps[0]) if afileMaps else None strMAPManTmp = sfle.d(c_strDirManMap, c_strID + c_strSufMap) strMAPin = (strMAPManTmp if os.path.exists(strMAPManTmp) else None) or strTopMAP or strAutoMAP # Use provided mapping files if not (strMAPin) and strTaxID: for fileMAPname in Glob( sfle.d(c_strPathUniprotKO, "*" + c_strSufMap)): if re.search(r'\D' + strTaxID + r'\D', str(fileMAPname)): strMAPin = str(fileMAPname) break # Else ask arepa to figure out an appropriate mapping file if not (strMAPin) and strTaxID: strMAPin = arepa.get_mappingfile(strTaxID) # Else use BridgeDB files if not (strMAPin) and strTaxID: for strSpeciesName in list(c_pHashBridgeDB.keys()): if strSpeciesName in arepa.taxid2org(strTaxID): strMAPin = c_pHashBridgeDB[strSpeciesName] break strBase, strExt = os.path.splitext(str(fileDATin)) strCount = funcCounter(g_iterCounter) strT = strBase + c_strMapped + strCount + strExt aastrPrefix = [[str(fileDATin)], [True, strT], "-c", str(aiCOL)] astrGeneFrom = ["-f", strGeneFrom] if strGeneFrom else ["-x"] astrGeneTo = ["-t", c_astrGeneTo[0]] astrSkip = ["-s", iSkip] afileLOGout = ["-l", [True, str(fileLOGout)]] if fileLOGout else [] astrMapIn = ["-m", [strMAPin]] if strMAPin else [] aastrArgs = aastrPrefix + astrGeneFrom + astrGeneTo + astrSkip + afileLOGout + astrMapIn pE.Precious(strMAPin) return pE.Depends( sfle.op(pE, c_funcGeneMapper, aastrArgs), sfle.scons_child( pE, c_strPathGeneMapper, None, None, None, ([strMAPin] if not (sfle.in_directory( strMAPin, os.path.abspath(arepa.cwd()))) else None)))
substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import arepa import csv import re import sfle import sys c_repo = arepa.cwd() c_strTaxid = c_repo + "_taxid_" c_strMode = "mode_" def symbol(hashSymbols, strValue): return hashSymbols.setdefault(strValue, len(hashSymbols)) if len(sys.argv) < 1: raise Exception("Usage: string2c.py [taxa] < <string.txt>") iMin = int(sys.argv[1]) strTaxa = None if (len(sys.argv) <= 2) else sys.argv[2] setTaxa = arepa.taxa(strTaxa)
import sfle import sys import ftplib import re def test(iLevel, strID, hashArgs): return (iLevel == 3) and (strID.find("GSE") == 0) if "testing" in locals(): sys.exit() pE = DefaultEnvironment() c_strID = arepa.cwd().replace("-RAW", "") c_strURLGEO = 'ftp.ncbi.nih.gov' c_strURLGEOsupp = 'pub/geo/DATA/supplementary/samples/' c_strURLSupp = 'ftp://' + c_strURLGEO + '/' + c_strURLGEOsupp c_strFileGSM = "../GSM.txt" c_strFilePCL = "../" + c_strID + ".pcl" c_listTs = sfle.readcomment(c_strFileGSM) c_fileProgReadCel = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc, "readCel.R") c_fileProgProcessRaw = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc, "preprocessRaw.R") c_strInputRData = arepa.cwd() + ".RData" c_strOutputRData = c_strInputRData.replace("-RAW", "") c_filePPfun = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "preprocess")
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import arepa import os import sfle import sys c_afileSConscripts = sorted( Glob(sfle.d(arepa.path_repo(), sfle.c_strDirSrc, "SConscript*"))) if os.path.isfile("SConscript"): exec(compile(open("SConscript").read(), "SConscript", 'exec')) else: hashArgs = {} # I tried very hard to do this using import, but I can't find a way to prematurely # halt an import without sys.exit, which kills the entire process. for fileSConscript in c_afileSConscripts: strSConscript = fileSConscript.get_abspath() hashEnv = {"test": lambda *a: False, "testing": True} try: exec(compile(open(strSConscript).read(), strSConscript, 'exec'), hashEnv) except SystemExit: pass if hashEnv["test"](arepa.level(), arepa.cwd(), hashArgs): exec(compile(open(strSConscript).read(), strSConscript, 'exec'))