import os import sys import sfle import pickle import itertools import re import time import sfle g_iterCounter = itertools.count(0) c_strID = arepa.cwd() c_strPathRepo = arepa.name_repo() c_strSufMap = ".map" c_strMapped = "_mapped" c_strDirData = sfle.d(arepa.path_repo(), sfle.c_strDirData) c_strDirManMap = sfle.d(arepa.path_repo(), sfle.c_strDirEtc, "manual_mapping") c_astrGeneTo = sfle.readcomment( sfle.d(arepa.path_arepa(), sfle.c_strDirEtc, "geneid") or [arepa.genemap_genename()]) c_strPathGeneMapper = sfle.d(arepa.path_arepa(), "GeneMapper") c_strFileUnzipLog = sfle.d(c_strPathGeneMapper, sfle.c_strDirTmp, "unzip.log") c_strFileCompileLog = sfle.d(c_strPathGeneMapper, sfle.c_strDirTmp, "compile.log") c_strPathTopMapping = sfle.d(c_strPathGeneMapper, sfle.c_strDirEtc, "manual_mapping") c_strPathUniprotKO = sfle.d(c_strPathGeneMapper, sfle.c_strDirEtc, "uniprotko") c_fileProgMakeUnique = sfle.d(arepa.path_arepa(), sfle.c_strDirSrc, "makeunique.py") c_funcGeneMapper = sfle.d(c_strPathGeneMapper, sfle.c_strDirSrc, "bridgemapper.py")
import sys import metadata import glob def test(iLevel, strID, hashArgs): return (iLevel == 1) if "testing" in locals(): sys.exit() pE = DefaultEnvironment() c_strID = arepa.cwd() c_fileInputC = sfle.d(pE, arepa.path_repo(), sfle.c_strDirTmp, "mpidbc") c_fileIDPKL = sfle.d(pE, c_strID + ".pkl") c_fileIDDAB = sfle.d(pE, c_strID + ".dab") c_fileIDQUANT = sfle.d(c_strID + ".quant") c_fileIDRawDAT = sfle.d(pE, c_strID + "_00raw.dat") c_fileIDDAT = sfle.d(pE, c_strID + ".dat") c_fileProgUnpickle = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc, "unpickle.py") c_fileProgC2Metadata = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc, "c2metadata.py") c_fileProgC2DAT = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc, "c2dat.py") c_fileInputManCurTXT = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "manual_curation/", c_strID + sfle.c_strSufTXT) c_fileInputSConscriptGM = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc,
annot2map.py: parse mapping files begins with !platform_table_begin ends with !platform_table_end """ import sfle import glob import csv import sys import re import arepa import gzip c_fileMapping = sfle.d(arepa.path_repo(), sfle.c_strDirEtc, "mapping") c_hashHead = { k:v for (k, v) in [[y.strip() for y in x.split("%")] for x in sfle.readcomment( open( c_fileMapping))] } if sfle.readcomment(open(c_fileMapping))\ else { "^ID .*? platform" : "Affy", "Entrez Gene Symbol" : "HGNC", "Uniprot .*? Symbol" : "Uniprot/TrEMBL", "^(Entrez)? UniGene Symbol" : "UniGene", "Entrez Unigene Identifier" : "UniGene_ID", "GenBank Accession" : "GB_ACC", "Entrez Gene identifier" : "Entrez Gene", "GenBank Identifier" : "GenBank" } iArg = len(sys.argv) strFileAnnotGZ = sys.argv[1] if iArg > 1 else None strFileOut = sys.argv[2] if iArg > 2 else None
#!/usr/bin/env python import arepa import sys def test(iLevel, strID, hashArgs): return (iLevel == 2) if "testing" in locals(): sys.exit() #Import( "pE" ) #Import( "hashArgs" ) c_strID = arepa.cwd() c_strInputIDSDRF = hashArgs["strFileIDSDRF"] c_strInputIDIDF = hashArgs["strFileIDIDF"] c_astrInputADFs = hashArgs["astrFileADFs"] c_strFileIDTXT = c_strID + ".txt" c_strProgIDF2Metadata = arepa.d(arepa.path_repo(), arepa.c_strDirSrc, "idf2metadata.py") arepa.pipe(pE, c_strInputIDIDF, c_strProgIDF2Metadata, c_strFileIDTXT, [[True, s] for s in ([c_strInputIDSDRF] + c_astrInputADFs)]) Default(c_strFileIDTXT)
if "testing" in locals(): sys.exit() pE = DefaultEnvironment() c_strID = arepa.cwd() c_strGDS, c_strGPL = c_strID.split("-")[:2] c_fileInputSConscript = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc, "SConscript_pcl-dab.py") c_fileRSConscript = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc, "SConscript_rpackage.py") c_fileInputSOFTGZ = sfle.d(pE, "../" + c_strGDS + ".soft.gz") c_fileInputManCurTXT = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "manual_curation/", c_strID + ".txt") c_filePPfun = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "preprocess") c_strPPfun = sfle.readcomment(c_filePPfun)[0] c_fileTaxa = sfle.d(pE, "taxa.txt") c_fileStatus = sfle.d(pE, "status.txt") c_filePlatform = sfle.d(pE, "platform.txt") c_fileIDMap = sfle.d(pE, c_strID + ".map") c_fileIDMapRaw = sfle.d(pE, c_strID + "_raw.map") c_fileIDPKL = sfle.d(pE, c_strID + ".pkl") c_fileGPLTXTGZ = sfle.d(pE, c_strGPL + ".annot.gz") c_fileIDRawPCL = sfle.d(pE, c_strID + "_00raw.pcl") c_fileLogPackage = sfle.d(pE, "package") c_fileConfigPacakge = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "rpackage")
import metadata import re def test(iLevel, strID, hashArgs): return (iLevel == 2) and (strID.find("GSE") == 0) if "testing" in locals(): sys.exit() pE = DefaultEnvironment() c_strID = arepa.cwd() c_strSufRPackage = "_rpackage" c_fileGPL = sfle.d(pE, arepa.path_repo(), sfle.c_strDirTmp, "gpl.txt") c_fileAnnot = sfle.d(pE, arepa.path_repo(), sfle.c_strDirTmp, "annot.txt") c_fileInputSConscript = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc, "SConscript_pcl-dab.py") c_fileRSConscript = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc, "SConscript_rpackage.py") c_fileProgUnpickle = sfle.d(pE, arepa.path_arepa(), sfle.c_strDirSrc, "unpickle.py") c_fileInputGSER = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc, "gse.R") c_fileInputManCurTXT = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "manual_curation/", c_strID + ".txt") c_filePPfun = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "preprocess") c_fileRunRaw = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "raw") c_fileIDPKL = sfle.d(pE, c_strID + ".pkl")
if "testing" in locals(): sys.exit() #Import( "pE" ) #Import( "hashArgs" ) c_strID = arepa.cwd() c_strPrefix, c_strSuffix = c_strID.split("_") c_strType = c_strID[2:6] c_strInputIDSDRF = hashArgs["strFileIDSDRF"] c_astrInputADFs = hashArgs["astrFileADFs"] c_strInputSConscript = arepa.d(arepa.path_arepa(), arepa.c_strDirSrc, "SConscript_pcl-dab.py") c_strFileIDRawPCL = c_strID + "_00raw.pcl" c_strFileIDNormPCL = c_strID + "_01norm.pcl" c_strProgSamples2PCL = arepa.d(arepa.path_repo(), arepa.c_strDirSrc, "samples2pcl.py") c_fileInputData = Glob("../" + c_strPrefix + "*processed-data*" + c_strSuffix + "*")[0] #=============================================================================== # Calculate the final PCL + DAB #=============================================================================== #- Map probe IDs and add PCL formatting arepa.pipe(pE, c_fileInputData, c_strProgSamples2PCL, c_strFileIDRawPCL, [[True, s] for s in ([c_strInputIDSDRF] + c_astrInputADFs)]) exec(compile(open(c_strInputSConscript).read(), c_strInputSConscript, 'exec'))
if "testing" in locals(): sys.exit() pE = DefaultEnvironment() c_strID = arepa.cwd().replace("-RAW", "") c_strURLGEO = 'ftp.ncbi.nih.gov' c_strURLGEOsupp = 'pub/geo/DATA/supplementary/samples/' c_strURLSupp = 'ftp://' + c_strURLGEO + '/' + c_strURLGEOsupp c_strFileGSM = "../GSM.txt" c_strFilePCL = "../" + c_strID + ".pcl" c_listTs = sfle.readcomment(c_strFileGSM) c_fileProgReadCel = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc, "readCel.R") c_fileProgProcessRaw = sfle.d(pE, arepa.path_repo(), sfle.c_strDirSrc, "preprocessRaw.R") c_strInputRData = arepa.cwd() + ".RData" c_strOutputRData = c_strInputRData.replace("-RAW", "") c_filePPfun = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "preprocess") c_strPPfun = sfle.readcomment( c_filePPfun )[0] if \ sfle.readcomment( c_filePPfun ) else "affy::rma" c_fileExpTable = sfle.d(pE, "../" + c_strID + "_exp_metadata.txt") c_fileCondTable = sfle.d(pE, "../" + c_strID + "_cond_metadata.txt") Import("hashArgs")
import metadata import glob from subprocess import call as ex c_aiCOL = [0] c_iSkip = 2 c_iCOL = len(c_aiCOL) c_fileIDNormPCL = sfle.d(pE, c_strID + "_01norm.pcl") c_fileIDPCL = sfle.d(pE, c_strID + ".pcl") c_fileIDDAB = sfle.d(pE, c_strID + ".dab") c_fileIDQUANT = sfle.d(pE, c_strID + ".quant") c_fileIDPKL = sfle.d(pE, c_strID + ".pkl") c_fileStatus = sfle.d(pE, "status.txt") c_fileIDMap = sfle.d(pE, c_strID + ".map") c_fileIDMapRaw = sfle.d(pE, c_strID + "_raw.map") c_strDirManMap = sfle.d(arepa.path_repo(), sfle.c_strDirEtc, "manual_mapping") c_fileIDMappedPCL = sfle.d(pE, c_strID + "_00mapped.pcl") c_fileIDMappedPCL2 = sfle.d(pE, c_strID + "_01mapped.pcl") c_fileFlagSleipnir = sfle.d(pE, arepa.path_repo(), sfle.c_strDirEtc, "sleipnir") #Load GeneMapper SConscript exec(compile(open(arepa.genemapper()).read(), arepa.genemapper(), 'exec')) #- Normalize def funcIDNormPCL(target, source, env, iMaxLines=100000): strT, astrSs = sfle.ts(target, source) strS = astrSs[1] if (
substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import arepa import os import sfle import sys c_afileSConscripts = sorted( Glob(sfle.d(arepa.path_repo(), sfle.c_strDirSrc, "SConscript*"))) if os.path.isfile("SConscript"): exec(compile(open("SConscript").read(), "SConscript", 'exec')) else: hashArgs = {} # I tried very hard to do this using import, but I can't find a way to prematurely # halt an import without sys.exit, which kills the entire process. for fileSConscript in c_afileSConscripts: strSConscript = fileSConscript.get_abspath() hashEnv = {"test": lambda *a: False, "testing": True} try: exec(compile(open(strSConscript).read(), strSConscript, 'exec'), hashEnv) except SystemExit: