def createTrackInDir(dirName): '''Every Q function has a corresponding shell script Make wig file for all mapped files, for all organisms''' wrapperShell = '/home/chrisgre/scripts/mapping/createTrack.sh' mainConf = c.cgConfig('Main.conf') metaFileName = mainConf.conf['metaFileName'] for file in cg.recurseDir(dirName, end = '.mapped'): #check if mouse or human baseFName = cg.getBaseFileName(file) baseFName = baseFName.split('.')[0] metaDict = cg.getMetaFileDict(metaFileName) org = 'None' if baseFName in metaDict: if metaDict[baseFName][1] == 'NONE': print ' NO ORG KNOWN FOR', file continue else: org = metaDict[baseFName][1] print ' USING ORG', org, file #check if there is an organism, must check due to files not in metaFile if org == 'None': print ' NO org (not in meta file)', file continue while True: #submit job if there are less than ten if clusterCheck.queryNumJobsQ('chrisgre') < 1000: #subprocess.Popen(['qsub', '-q', 'xiao', '-l', 'mem=4G', '-V', '-o', mainConf.conf['outLog'], '-e', mainConf.conf['errorLog'], wrapperShell, file, org ]) subprocess.Popen(['qsub', '-V', '-o', mainConf.conf['outLog'], '-e', mainConf.conf['errorLog'], wrapperShell, file, org ]) #time.sleep(.5) #give it time to update qstat break else:#wait 10 secs... time.sleep(20)
#how much density for tissue? import cgConfig import bioLibCG as cg mainConf = cgConfig.cgConfig('Main.conf') conf = cgConfig.cgConfig() predFile = open(conf.conf['resultsSorted'], 'r') #must be sorted!!! mirIDs = [x.strip().split('\t')[0].split('.')[0] for x in predFile] #make lib:tissue dict metaDict = cg.getMetaFileDict(mainConf.conf['metaFileName']) #lib is getBaseFileName(key, naked = True): tisue is metaDict[key][3] #make lib:hits dict densityFile = open('/home/chrisgre/scripts/readDensity/individual.densities.data', 'r') tissueHits = {} cID = 'NONE' for line in densityFile: if line.startswith('\t'): #lib: hit l = line.strip().split('\t')[0] hits = int(line.strip().split('\t')[1]) tissueHits[cID][l] = hits else: cID = line.strip() tissueHits[cID] = {} tissueHist = {}#tissue: hits for mirID in mirIDs:
import bioLibCG as cg import cgConfig #init conf = cgConfig.cgConfig() mainConf = cgConfig.cgConfig('Main.conf') pFileName = conf.conf['results'] smallFileName = conf.conf['smallAnalysis'] organism = conf.conf['organism'] #make list of acceptable files for organism organismFileList = [] #NOTE: THESE ARE THE NAKED BASE FILE NAMES! metaDict = cg.getMetaFileDict(mainConf.conf['metaFileName']) for baseFName in metaDict: if metaDict[baseFName][1] == organism: organismFileList.append(baseFName) #put small hits for each prediction in dictionary pCount = {} smallFile = open(smallFileName, 'r') currID = 'NONE' for line in smallFile: if '\t' not in line: #This is the line with the id in it --> store another ID currID = line.strip() else: #this line contains library and count info --> add lib = line.strip().split('\t')[0] count = int(line.strip().split('\t')[1]) if cg.getBaseFileName(lib, naked=True) in organismFileList: if currID in pCount:
def createMultiTrack(dirName, organism): '''merge all mapped tracks in directory and create a single wig file''' mainConf = c.cgConfig('Main.conf') metaFileName = mainConf.conf['metaFileName'] fileList = [] for file in cg.recurseDir(dirName, end = '.mapped'): #check if mouse or human SHOULD PUT INTO A STD FUNCTION FOR META FILE #check if mouse or human baseFName = cg.getBaseFileName(file, naked= True) metaDict = cg.getMetaFileDict(metaFileName) org = 'None' if baseFName in metaDict: if metaDict[baseFName][1] == 'NONE': print ' NO ORG KNOWN FOR', file continue elif not metaDict[baseFName][1] == organism: print ' NOT ORGANISM RUNNING', file continue else: org = metaDict[baseFName][1] print ' USING ORG', org, file #check if there is an organism, must check due to files not in metaFile if org == 'None': print ' NO org (not in meta file)', file continue #only make wig file for organism asked for if not org == organism: continue #if it is right organism and has mapped file then add fileList.append(file) #make merged wig if organism == 'human': chroms = cg.humanChromosomes assembly = 'hg19' elif organism == 'mouse': chroms = cg.mouseChromosomes assembly = 'mm9' elif organism == 'zebrafish': chroms = cg.zebrafishChromosomes assembly = 'danRer6' print 'Making Bed File vectors' cvg = HTSeq.GenomicArray(chroms, stranded=True, typecode='i') for fName in fileList: alignment_file = HTSeq.BowtieReader(fName) for alngt in alignment_file: if alngt.aligned: cvg.add_value( 1, alngt.iv ) #iv is the genomic interval.. bedNamePos = dirName + '/Merge.' + organism + '.1.wig' bedNameNeg = dirName + '/Merge.' + organism + '.-1.wig' print 'Writing Bed File' cvg.write_bedgraph_file(bedNamePos, "+" ) cvg.write_bedgraph_file(bedNameNeg, "-" ) #Now extend it updateWigLength(bedNamePos, assembly) updateWigLength(bedNameNeg, assembly) #Now Sort it. cgSort.wigSort(bedNamePos) cgSort.wigSort(bedNameNeg)