示例#1
0
def GetDistribListForDirectory(directory, filesToFind):
	distribList = list();
	for seqFile in SeqGenUtils.findFiles(directory, filesToFind):
		seqs, gc_list, fg_lengths = shuffle_utils.get_seqs(seqFile)
		dinuc_distrib = shuffle_utils.compute_dinuc_distrib(seqs, True)			
		distribList.append(dinuc_distrib)		
	return distribList;
def parseSubDirectories(resultDir, level=1):
	gcContentMap = dict();
	resultFileName = resultDir + resultDir[:-1] + "_GC_Content.out"
	print "Result FileName: ", resultFileName
	for signalFile in SeqGenUtils.findFiles(resultDir, "Signal*.fa"):
		print "Signal File: ", signalFile;
		expt_name = os.path.dirname(signalFile).split("/")[2]
		gcContentValue, atContentValue = gcContent.getNucleotideComposition(signalFile)
		print gcContentValue, atContentValue;
		if expt_name in gcContentMap.keys():
			gcContentMap[str(expt_name)].append(gcContentValue);
		else:
			gcContentMap[str(expt_name)] = [gcContentValue];

	gcContentMeanStd = dict();
	for key, value in gcContentMap.iteritems():
		meanValue = np.mean(value);
		varianceValue = np.var(value);
		gcContentMeanStd[key] = [meanValue, varianceValue]

	writeDictToFile(gcContentMeanStd, resultFileName);