Exemplo n.º 1
0
def main(options, programName, programVersion):

## REGION: DEFAULT VALUES AND VARIABLE ACQUISITION

	# basic default info
	logFile = ""
	tagFile = ""
	analysisName = ""
	analysisFolder = ""
	defaultAnalysisName = programName.lower()
	verbose = False
	separator = "_"
	
	# default extensions
    # begin: jmrc
	# defaultTableExtension = ".xls"
	defaultTextExtension = ".txt"
	# defaultTextExtension = ".tsv"
    # end: jmrc
	
	# default file names
	defaultLogFile = "logFile"
	defaultNewRelsFile = "newRels"
	defaultNewDataFile = "newData"
	newRelsFile = ""
	newDataFile = ""
	
	# basic log file
	logList = [[programName + " " + programVersion], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

	# parsing arguments from commandline
	options.add_argument("-a", "--analysis", type = str, default = "", required = True, help = "Use a prefix for the output files.")
	options.add_argument("-p", "--place", type = str, default = "", required = True, help = "To use a different common folder for the output files. If this is not provided, the the folder used will be the same as the FASTA file folder.")
	options.add_argument("-L", "--logfile", type = str, default = "", required = False, help = "To use a non-default name for the log file.")
	options.add_argument("-t", "--tagfile", type = str, default = "", required = True, help = "The file containing the tags used for the different experiments to be joined.")
	options.add_argument("-d", "--datafile", type = str, default = "", required = False, help = "To use a non-default merged data file name.")
	options.add_argument("-r", "--relfile", type = str, default = "", required = False, help = "To use a non-default merged relations file name.")
	options.add_argument("-s", "--separator", type = str, default = "_", required = False, help = """To use a non-default suffix separator (default is "_").""")
	options.add_argument("-v", "--verbose", action = "store_true", help = "To write down extra information about operations performed.")
	
	# *-*-* add easter egg
	
	arguments = options.parse_args()
	
	# copying parsed arguments
	# copy any arguments used
	if len(arguments.analysis) > 0: analysisName = arguments.analysis
	if len(arguments.place) > 0: analysisFolder = arguments.place
	if len(arguments.logfile) > 0: logFile = arguments.logfile
	if len(arguments.tagfile) > 0: tagFile = arguments.tagfile
	if len(arguments.datafile) > 0: newDataFile = arguments.datafile
	if len(arguments.relfile) > 0: newRelsFile = arguments.relfile
	if len(arguments.separator) > 0: separator = arguments.separator
	verbose = arguments.verbose
	
## END REGION: DEFAULT VALUES AND VARIABLE ACQUISITION
## **********************************************************
## REGION: FILE NAMES SETUP

	if len(analysisName) == 0:
		if len(tagFile) > 0:
			analysisName = os.path.splitext(os.path.basename(tagFile))[0]
		else:
			analysisName = defaultAnalysisName

	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
			
	# input

	if len(os.path.dirname(tagFile)) == 0:
		tagFile = os.path.join(analysisFolder, tagFile)
		
	# output
	
	if len(logFile) == 0:
		logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension)
	if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0:
		logFile = os.path.join(analysisFolder, logFile)
		
	if len(newRelsFile) == 0:
		newRelsFile = os.path.join(analysisFolder, analysisName + "_" + defaultNewRelsFile + defaultTextExtension)
	if len(os.path.dirname(newRelsFile)) == 0 and len(os.path.basename(newRelsFile)) > 0:
		newRelsFile = os.path.join(analysisFolder, newRelsFile)
		
	if len(newDataFile) == 0:
		newDataFile = os.path.join(analysisFolder, analysisName + "_" + defaultNewDataFile + defaultTextExtension)
	if len(os.path.dirname(newDataFile)) == 0 and len(os.path.basename(newDataFile)) > 0:
		newDataFile = os.path.join(analysisFolder, newDataFile)

	logList.append([""])
	logList.append(["Input tags file: " + tagFile])
	logList.append(["Output new data file: " + newDataFile])
	logList.append(["Output new relations file: " + newRelsFile])
	logList.append(["Output log file: " + logFile])
	logList.append([""])

## END REGION: FILE NAMES SETUP			
## **********************************************************
## REGION: PROGRAM BASIC STRUCTURE

	tagList, dataFileList = readTagFile(tagFile, defaultFolder = analysisFolder)
	
	processMessage = processFiles(tagList,
									dataFileList,
									verbose = verbose,
									separator = separator,
									dataFile = newDataFile,
									relsFile = newRelsFile)
	
	logList.extend(processMessage)
	
## END REGION: PROGRAM BASIC STRUCTURE
## **********************************************************
## REGION: SAVING FILES
	
	## exceptionally, due to memory errors, the data are read and written in processFiles
	# try:
		# # operations here
		# logList.append(["Saving new data file..."])
		# stats.saveFile(newDataFile, newDataList, "idinf\tX'inf\tVinf")
	# except Exception:
		# logList.append(["Error."])
		# stats.saveFile(logFile, logList, "LOG FILE")
		# sys.exit(2)
	
	# try:
		# # operations here
		# logList.append(["Saving new relations file..."])
		# stats.saveFile(newRelsFile, newRelsList, "idsup\tidinf")
	# except Exception:
		# stats.saveFile(logFile, logList, "LOG FILE")
		# logList.append(["Error."])
		# sys.exit(2)
	
	try:
		# operations here
		logList.append(["Looks like everything went fine."])
	except Exception:
		logList.append(["Error."])
		stats.saveFile(logFile, logList, "LOG FILE")
		sys.exit(2)
	
	stats.saveFile(logFile, logList, "LOG FILE")
Exemplo n.º 2
0
def showGraphTool(inputRawData,
                  inputRelations,
                  k,
                  variance,
                  alpha,
                  medianSide,
                  verbose=False,
                  showRank=False,
                  graphFile=None,
                  graphData=None,
                  dpi=None,
                  showGraph=True):

    plt.clf()
    inputRawData.sort()
    inputRelations.sort()

    windowWidth = medianSide * 2 + 1
    if len(inputRawData) < windowWidth:
        print('Error: window for median is bigger than total input size')
        sys.exit(1)

    # output = makeStats(k, variance, input = input)
    nextIdXData = getNextIdX_klibrate(inputRawData,
                                      inputRelations,
                                      k,
                                      variance,
                                      alpha,
                                      giveMergedData=True)

    nextIdX = nextIdXData[0]
    mergedData = nextIdXData[1]

    MADdistrOut, weights = getMADDistribution(nextIdX, mergedData, k, variance,
                                              alpha, medianSide)
    invOfFitOut = getInverseOfFit(mergedData, k, variance, alpha)

    MADdistrOut = MADdistrOut[medianSide:len(MADdistrOut) - medianSide + 1]
    invOfFitOut = invOfFitOut[medianSide:len(invOfFitOut) - medianSide + 1]
    weights = weights[medianSide:len(weights) - medianSide + 1]

    # folderToSave = "D:\\DATUMARO\\trabajo\\programas_repositorio\\BioSistemas SanXoT\\MicroArrays\\intento5 CvsCs usando var robusta por partes\\"
    # stats.saveFile(folderToSave + "MADdistrOut.txt", MADdistrOut)
    # stats.saveFile(folderToSave + "invOfFitOut.txt", invOfFitOut)
    # stats.saveFile(folderToSave + "weights.txt", weights)

    if showRank:
        plt.plot(list(range(len(MADdistrOut))),
                 MADdistrOut,
                 'g.',
                 list(range(len(invOfFitOut))),
                 invOfFitOut,
                 'r',
                 linewidth=1.0,
                 markersize=2.0,
                 markeredgewidth=0.0)
        plt.xlabel('rank($V_s$)')
        plt.ylabel('1 / MSD')

        # to save data
        # *** use a better filename
        dataToSave = []
        for i in range(len(MADdistrOut)):
            dataToSave.append([i, weights[i], MADdistrOut[i], invOfFitOut[i]])

        if graphData:
            stats.saveFile(graphData, dataToSave,
                           "rank(Vs)\tweight\tMAD\t1/fit")
    else:

        # uncomment to graph MSD instead of 1 / MSD
        #
        # for i in xrange(len(invOfFitOut)):
        # invOfFitOut[i] = 1 / invOfFitOut[i]
        # for i in xrange(len(MADdistrOut)):
        # MADdistrOut[i] = 1 / MADdistrOut[i]

        plt.plot(weights,
                 MADdistrOut,
                 'g.',
                 weights,
                 invOfFitOut,
                 'r',
                 linewidth=1.0,
                 markersize=2.0,
                 markeredgewidth=0.0)
        plt.xlabel('($V_s$)')
        plt.ylabel('1 / MSD')

    plt.grid(True)
    plt.title('k = %g, $\sigma^2$ = %g, alpha = %g' % (k, variance, alpha))

    if graphFile:
        plt.savefig(graphFile, dpi=dpi)

    if showGraph:
        plt.show()
Exemplo n.º 3
0
def main(argv):

    version = "v1.19"
    verbose = True
    showGraph = True
    graphDPI = 100  # default of Matplotlib's savefig method
    showSteps = True
    forceParameters = False
    kSeed = 1.0
    varianceSeed = 0.001
    alphaSeed = 1.0
    useCooperativity = False
    medianSide = 100
    maxIterations = 0
    dataFile = ""
    relationsFile = ""
    outputCalibrated = ""
    infoFile = ""
    kFile = ""
    kSeedProvided = False
    varFile = ""
    varianceSeedProvided = False
    graphFileVRank = ""
    graphFileVValue = ""
    graphDataFile = ""
    showRank = False
    analysisName = ""
    defaultAnalysisName = "klibrate"
    analysisFolder = ""
    logList = [["Klibrate " + version],
               ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]
    defaultDataFile = "data"
    defaultRelationsFile = "rels"
    defaultOutputInfo = "infoFile"
    defaultOutputGraphVRank = "outGraph_VRank"
    defaultOutputGraphVValue = "outGraph_VValue"
    defaultGraphDataFile = "outGraph_Data"
    defaultOutputCalibrated = "calibrated"
    defaultTableExtension = ".tsv"
    defaultTextExtension = ".txt"
    defaultGraphExtension = ".png"

    try:
        opts, args = getopt.getopt(
            argv, "a:p:k:v:c:d:r:o:w:m:L:G:D:R:K:V:bgsfh", [
                "analysis=", "folder=", "kseed=", "varianceseed=",
                "alphaseed=", "datafile=", "relfile=", "outputfile=",
                "window=", "maxiterations=", "infofile=", "outgraphvrank=",
                "outgraphvvalue=", "outgraphdata=", "kfile=", "varfile=",
                "no-verbose", "no-showgraph", "no-showsteps",
                "forceparameters", "showrank", "help", "egg", "easteregg"
            ])
    except getopt.GetoptError:
        message = "Error while getting parameters."
        print(message)
        logList.append([message])
        # stats.saveFile(infoFile, logList, "INFO FILE")
        sys.exit(2)

    if len(opts) == 0:
        printHelp(version)
        sys.exit()

    for opt, arg in opts:
        if opt in ("-a", "--analysis"):
            analysisName = arg
        if opt in ("-p", "--place", "--folder"):
            analysisFolder = arg
        if opt in ("-k", "--kseed"):
            kSeed = float(arg)
            kSeedProvided = True
        elif opt in ("-v", "--var", "--varianceseed"):
            varianceSeed = float(arg)
            varianceSeedProvided = True
        elif opt in ("-c", "--alphaseed"):
            useCooperativity = True
            alphaSeed = float(arg)
        elif opt in ("-d", "--datafile"):
            dataFile = arg
        elif opt in ("-r", "--relfile", "--relationsfile"):
            relationsFile = arg
        elif opt in ("-o", "--outputfile"):
            outputCalibrated = arg
        elif opt in ("-w", "--window"):
            windowWidth = round(float(arg))
            if windowWidth % 2 == 0:
                windowWidth += 1
            medianSide = int((windowWidth - 1) / 2)
        elif opt in ("-b", "--no-verbose"):
            verbose = False
        elif opt in ("-g", "--no-showgraph"):
            showGraph = False
        elif opt in ("-s", "--no-showsteps"):
            showSteps = False
        elif opt in ("-m", "--maxiterations"):
            maxIterations = int(arg)
        elif opt in ("-L", "--infofile"):
            infoFile = arg
        elif opt in ("-G", "--outgraphvvalue"):
            graphFileVValue = arg
        elif opt in ("-D", "--outgraphdata"):
            graphDataFile = arg
        elif opt in ("-K", "--kfile"):
            kFile = arg
        elif opt in ("-V", "--varfile"):
            varFile = arg
        elif opt in ("-f", "--forceparameters"):
            forceParameters = True
        elif opt in ("-R", "--outgraphvrank"):
            graphFileVRank = arg
        elif opt in ("-h", "--help"):
            printHelp(version)
            sys.exit()
        elif opt in ("--egg", "--easteregg"):
            easterEgg()
            sys.exit()

    verbose = verbose or showSteps

    # REGION: FILE NAMES SETUP

    if len(analysisName) == 0:
        if len(dataFile) > 0:
            analysisName = os.path.splitext(os.path.basename(dataFile))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(dataFile) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(dataFile)) > 0:
            analysisFolder = os.path.dirname(dataFile)

    # input
    if len(dataFile) == 0:
        dataFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultDataFile + defaultTableExtension)

    if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0:
        dataFile = os.path.join(analysisFolder, dataFile)

    if len(relationsFile) == 0:
        relationsFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultRelationsFile + defaultTableExtension)

    if len(os.path.dirname(relationsFile)) == 0:
        relationsFile = os.path.join(analysisFolder, relationsFile)

    if len(os.path.dirname(varFile)) == 0 and len(
            os.path.basename(varFile)) > 0:
        varFile = os.path.join(analysisFolder, varFile)

    if len(varFile) > 0 and not varianceSeedProvided:
        varianceSeed, varianceOk = stats.extractVarianceFromVarFile(
            varFile, verbose=verbose, defaultSeed=varianceSeed)
        if not varianceOk:
            logList.append(["Variance not found in text file"])
            stats.saveFile(infoFile, logList, "INFO FILE")
            sys.exit()

    if len(os.path.dirname(kFile)) == 0 and len(os.path.basename(kFile)) > 0:
        kFile = os.path.join(analysisFolder, kFile)

    if len(kFile) > 0 and not kSeedProvided:
        kSeed, KOk = stats.extractKFromKFile(kFile,
                                             verbose=verbose,
                                             defaultSeed=kSeed)
        if not KOk:
            logList.append(["K not found in text file."])
            stats.saveFile(infoFile, logList, "INFO FILE")
            sys.exit()

    # output
    if len(outputCalibrated) == 0:
        outputCalibrated = os.path.join(
            analysisFolder, analysisName + "_" + defaultOutputCalibrated +
            defaultTableExtension)
    else:
        if len(os.path.dirname(outputCalibrated)) == 0:
            outputCalibrated = os.path.join(analysisFolder, outputCalibrated)

    if len(graphFileVRank) == 0:
        graphFileVRank = os.path.join(
            analysisFolder, analysisName + "_" + defaultOutputGraphVRank +
            defaultGraphExtension)

    if len(graphFileVValue) == 0:
        graphFileVValue = os.path.join(
            analysisFolder, analysisName + "_" + defaultOutputGraphVValue +
            defaultGraphExtension)

    if len(graphDataFile) == 0:
        graphDataFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultGraphDataFile + defaultTableExtension)

    if len(infoFile) == 0:
        infoFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutputInfo + defaultTextExtension)

    logList.append(["Variance seed = " + str(varianceSeed)])
    logList.append(["K seed = " + str(kSeed)])
    if useCooperativity: logList.append(["Alpha seed = " + str(alphaSeed)])
    logList.append(["Input data file: " + dataFile])
    logList.append(["Input relations file: " + relationsFile])
    logList.append(["Output calibrated file: " + outputCalibrated])
    logList.append(["Output info file: " + infoFile])
    logList.append(["Output V rank graph file: " + graphFileVRank])
    logList.append(["Output V graph file: " + graphFileVValue])
    logList.append(["Output data file for graph: " + graphDataFile])
    logList.append(["Parameters forced: " + str(forceParameters)])
    logList.append(["Max iterations: " + str(maxIterations)])

    # END REGION: FILE NAMES SETUP

    calibratedData, variance, k, alpha, extraLog = calibrate(
        rawDataFile=dataFile,
        relationsFile=relationsFile,
        kSeed=kSeed,
        varianceSeed=varianceSeed,
        medianSide=medianSide,
        maxIterations=maxIterations,
        verbose=showSteps,
        showGraph=showGraph,
        forceParameters=forceParameters,
        alphaSeed=alphaSeed,
        showRank=showRank,
        useCooperativity=useCooperativity,
        graphFileVRank=graphFileVRank,
        graphFileVValue=graphFileVValue,
        graphDataFile=graphDataFile,
        graphDPI=graphDPI)

    logList.extend(extraLog)

    if not calibratedData:
        if len(infoFile) > 0:
            stats.saveFile(infoFile, logList, "INFO FILE")
        sys.exit()

    if len(outputCalibrated) > 0:
        stats.saveFile(outputCalibrated, calibratedData, "id\tX\tVcal")

    logList.append([])
    logList.append(["K = " + str(k)])
    logList.append(["Variance = " + str(variance)])
    if useCooperativity: logList.append(["Alpha = " + str(alpha)])

    if len(infoFile) > 0:
        stats.saveFile(infoFile, logList, "INFO FILE")

    if verbose:
        print()
        print("*** results ***")
        print("k = " + str(k))
        print("variance = " + str(variance))
        if useCooperativity: print("alpha = " + str(alpha))
        print()
        print("Output calibrated file in: " + outputCalibrated)
        print("Graph with rank of V in: " + graphFileVRank)
        print("Graph with value of V in: " + graphFileVValue)
        print("Info file in: " + infoFile)
Exemplo n.º 4
0
def main(argv):
	
	version = "v1.18"
	verbose = True
	showGraph = True
	graphDPI = 100 # default of Matplotlib's savefig method
	showSteps = True
	forceParameters = False
	kSeed = 1.0
	varianceSeed = 0.001
	alphaSeed = 1.0
	useCooperativity = False
	medianSide = 100
	maxIterations = 0
	dataFile = ""
	relationsFile = ""
	outputCalibrated = ""
	infoFile = ""
	kFile = ""
	kSeedProvided = False
	varFile = ""
	varianceSeedProvided = False
	graphFileVRank = ""
	graphFileVValue = ""
	graphDataFile = ""
	showRank = False
	analysisName = ""
	defaultAnalysisName = "klibrate"
	analysisFolder = ""
	logList = [["Klibrate " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]
	defaultDataFile = "data"
	defaultRelationsFile = "rels"
	defaultOutputInfo = "infoFile"
	defaultOutputGraphVRank = "outGraph_VRank"
	defaultOutputGraphVValue = "outGraph_VValue"
	defaultGraphDataFile = "outGraph_Data"
	defaultOutputCalibrated = "calibrated"
	defaultTableExtension = ".tsv"
	defaultTextExtension = ".txt"
	defaultGraphExtension = ".png"
	
	try:
		opts, args = getopt.getopt(argv, "a:p:k:v:c:d:r:o:w:m:L:G:D:R:K:V:bgsfh", ["analysis=", "folder=", "kseed=", "varianceseed=", "alphaseed=", "datafile=", "relfile=", "outputfile=", "window=", "maxiterations=", "infofile=", "outgraphvrank=", "outgraphvvalue=", "outgraphdata=", "kfile=", "varfile=", "no-verbose", "no-showgraph", "no-showsteps", "forceparameters", "showrank", "help", "egg", "easteregg"])
	except getopt.GetoptError:
		message = "Error while getting parameters."
		print(message)
		logList.append([message])
		# stats.saveFile(infoFile, logList, "INFO FILE")
		sys.exit(2)
		
	if len(opts) == 0:
		printHelp(version)
		sys.exit()
		
	for opt, arg in opts:
		if opt in ("-a", "--analysis"):
			analysisName = arg
		if opt in ("-p", "--place", "--folder"):
			analysisFolder = arg
		if opt in ("-k", "--kseed"):
			kSeed = float(arg)
			kSeedProvided = True
		elif opt in ("-v", "--var", "--varianceseed"):
			varianceSeed = float(arg)
			varianceSeedProvided = True
		elif opt in ("-c", "--alphaseed"):
			useCooperativity = True
			alphaSeed = float(arg)
		elif opt in ("-d", "--datafile"):
			dataFile = arg
		elif opt in ("-r", "--relfile", "--relationsfile"):
			relationsFile = arg
		elif opt in ("-o", "--outputfile"):
			outputCalibrated = arg
		elif opt in ("-w", "--window"):
			windowWidth = round(float(arg))
			if windowWidth % 2 == 0:
				windowWidth += 1
			medianSide = int((windowWidth - 1) / 2)
		elif opt in ("-b", "--no-verbose"):
			verbose = False
		elif opt in ("-g", "--no-showgraph"):
			showGraph = False
		elif opt in ("-s", "--no-showsteps"):
			showSteps = False
		elif opt in ("-m", "--maxiterations"):
			maxIterations = int(arg)
		elif opt in ("-L", "--infofile"):
			infoFile = arg
		elif opt in ("-G", "--outgraphvvalue"):
			graphFileVValue = arg
		elif opt in ("-D", "--outgraphdata"):
			graphDataFile = arg
		elif opt in ("-K", "--kfile"):
			kFile = arg
		elif opt in ("-V", "--varfile"):
			varFile = arg
		elif opt in ("-f", "--forceparameters"):
			forceParameters = True
		elif opt in ("-R", "--outgraphvrank"):
			graphFileVRank = arg
		elif opt in ("-h", "--help"):
			printHelp(version)
			sys.exit()
		elif opt in ("--egg", "--easteregg"):
			easterEgg()
			sys.exit()
	
	verbose = verbose or showSteps

# REGION: FILE NAMES SETUP

	if len(analysisName) == 0:
		if len(dataFile) > 0:
			analysisName = os.path.splitext(os.path.basename(dataFile))[0]
		else:
			analysisName = defaultAnalysisName
	
	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
			
	if len(dataFile) > 0 and len(analysisFolder) == 0:
		if len(os.path.dirname(dataFile)) > 0:
			analysisFolder = os.path.dirname(dataFile)
			
	# input
	if len(dataFile) == 0:
		dataFile = os.path.join(analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension)
		
	if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0:
		dataFile = os.path.join(analysisFolder, dataFile)
	
	if len(relationsFile) == 0:
		relationsFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension)
	
	if len(os.path.dirname(relationsFile)) == 0:
		relationsFile = os.path.join(analysisFolder, relationsFile)
		
	if len(os.path.dirname(varFile)) == 0 and len(os.path.basename(varFile)) > 0:
		varFile = os.path.join(analysisFolder, varFile)
	
	if len(varFile) > 0 and not varianceSeedProvided:
		varianceSeed, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = verbose, defaultSeed = varianceSeed)
		if not varianceOk:
			logList.append(["Variance not found in text file"])
			stats.saveFile(infoFile, logList, "INFO FILE")
			sys.exit()
			
	if len(os.path.dirname(kFile)) == 0 and len(os.path.basename(kFile)) > 0:
		kFile = os.path.join(analysisFolder, kFile)
	
	if len(kFile) > 0 and not kSeedProvided:
		kSeed, KOk = stats.extractKFromKFile(kFile, verbose = verbose, defaultSeed = kSeed)
		if not KOk:
			logList.append(["K not found in text file."])
			stats.saveFile(infoFile, logList, "INFO FILE")
			sys.exit()
		
	# output
	if len(outputCalibrated) == 0:
		outputCalibrated = os.path.join(analysisFolder, analysisName + "_" + defaultOutputCalibrated + defaultTableExtension)
	else:
		if len(os.path.dirname(outputCalibrated)) == 0:
			outputCalibrated = os.path.join(analysisFolder, outputCalibrated)
	
	if len(graphFileVRank) == 0:
		graphFileVRank = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGraphVRank + defaultGraphExtension)
	
	if len(graphFileVValue) == 0:
		graphFileVValue = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGraphVValue + defaultGraphExtension)
	
	if len(graphDataFile) == 0:
		graphDataFile = os.path.join(analysisFolder, analysisName + "_" + defaultGraphDataFile + defaultTableExtension)
	
	if len(infoFile) == 0:
		infoFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension)
		
	logList.append(["Variance seed = " + str(varianceSeed)])
	logList.append(["K seed = " + str(kSeed)])
	if useCooperativity: logList.append(["Alpha seed = " + str(alphaSeed)])
	logList.append(["Input data file: " + dataFile])
	logList.append(["Input relations file: " + relationsFile])
	logList.append(["Output calibrated file: " + outputCalibrated])
	logList.append(["Output info file: " + infoFile])
	logList.append(["Output V rank graph file: " + graphFileVRank])
	logList.append(["Output V graph file: " + graphFileVValue])
	logList.append(["Output data file for graph: " + graphDataFile])
	logList.append(["Parameters forced: " + str(forceParameters)])
	logList.append(["Max iterations: " + str(maxIterations)])
	
# END REGION: FILE NAMES SETUP
	
	calibratedData, variance, k, alpha, extraLog = calibrate(rawDataFile = dataFile,
		relationsFile = relationsFile, kSeed = kSeed, varianceSeed = varianceSeed,
		medianSide = medianSide, maxIterations = maxIterations, verbose = showSteps,
		showGraph = showGraph, forceParameters = forceParameters, alphaSeed = alphaSeed,
		showRank = showRank, useCooperativity = useCooperativity, graphFileVRank = graphFileVRank, graphFileVValue = graphFileVValue, graphDataFile = graphDataFile, graphDPI = graphDPI)
	
	logList.extend(extraLog)
	
	if not calibratedData:
		if len(infoFile) > 0:
			stats.saveFile(infoFile, logList, "INFO FILE")
		sys.exit()
	
	if len(outputCalibrated) > 0:
		stats.saveFile(outputCalibrated, calibratedData, "id\tX\tVcal")
	
	logList.append([])
	logList.append(["K = " + str(k)])
	logList.append(["Variance = " + str(variance)])
	if useCooperativity: logList.append(["Alpha = " + str(alpha)])
	
	if len(infoFile) > 0:
		stats.saveFile(infoFile, logList, "INFO FILE")
	
	if verbose:
		print()
		print("*** results ***")
		print("k = " + str(k))
		print("variance = " + str(variance))
		if useCooperativity: print("alpha = " + str(alpha))
		print()
		print("Output calibrated file in: " + outputCalibrated)
		print("Graph with rank of V in: " + graphFileVRank)
		print("Graph with value of V in: " + graphFileVValue)
		print("Info file in: " + infoFile)
Exemplo n.º 5
0
def main(argv):

    version = "v1.06"
    verbose = False
    showGraph = True
    showLegend = True
    analysisName = ""
    graphLimits = 6.0
    defaultAnalysisName = "arbor"
    analysisFolder = ""
    # input files
    inStats = ""
    useSubStats = False  # True if inStats (-z) and relationsFile (-r) are provided
    bigListFile = ""
    defaultStatsFile = "stats"
    defaultRelationsFile = "rels"
    defaultTableExtension = ".tsv"
    defaultTextExtension = ".txt"
    defaultGVFileExtension = ".gv"
    relationsFile = ""
    listOfCategoriesFile = ""
    # output files
    defaultListOfCategoriesFile = "ulst"
    defaultBigListFile = "table_allPaths.tsv"
    defaultOutputFile = "outNodes"
    defaultOutputGraphFile = "outTree"
    defaultLogFile = "logFile"
    logFile = ""
    outputFile = ""
    graphFile = ""
    similarityMatrixFile = ""
    graphFileFormat = "png"
    altMax = 5

    selectedNodeColour = "#ff9090"
    defaultNodeColour = "#ffff80"
    errorNodeColour = "#8080ff"
    minColour = "#00ff00"
    middleColour = "#ffffff"
    maxColour = "#ff0000"

    logList = [["SanXoTGauss " + version],
               ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

    try:
        opts, args = getopt.getopt(argv, "a:p:z:c:d:l:L:b:G:r:g:N:h", [
            "analysis=", "folder=", "place=", "statsfile=", "list=",
            "dotfile=", "graphlimits=", "logfile=", "biglist=", "graphfile=",
            "relfile=", "relationsfile=", "graphformat=", "altmax=",
            "selectednodecolour=", "selectednodecolor=", "defaultnodecolour=",
            "defaultnodecolor=", "errornodecolour=", "errornodecolor=",
            "mincolour=", "mincolor=", "middlecolour=", "middlecolor=",
            "maxcolour=", "maxcolor=", "help", "egg", "easteregg"
        ])
    except getopt.GetoptError:
        message = "Error while getting parameters."
        print message
        logList.append([message])
        sys.exit(2)

    if len(opts) == 0:
        printHelp(version)
        sys.exit()

    for opt, arg in opts:
        if opt in ("-a", "--analysis"):
            analysisName = arg
        elif opt in ("-p", "--place", "--folder"):
            analysisFolder = arg
        elif opt in ("-z", "--statsfile"):
            inStats = arg
        elif opt in ("-c", "--list"):  # outList from SanXoTSqueezer
            listOfCategoriesFile = arg
        elif opt in ("-d", "--dotfile"):
            dotFile = float(arg)
        elif opt in ("-l", "--graphlimits"):
            graphLimits = float(arg)
        elif opt in ("-L", "--logfile"):
            logFile = arg
        elif opt in ("-d", "--dotfile"):
            outputFile = arg
        elif opt in ("-G", "--graphfile"):
            graphFile = arg
        elif opt in ("-b", "--biglist"):  # table_allPaths.xls from GOconnect
            bigListFile = arg
        elif opt in ("-r", "--relfile", "--relationsfile"):
            relationsFile = arg
        elif opt in ("-N", "--altmax"):
            altMax = int(arg)

        elif opt in ("-g", "--graphformat"):
            graphFileFormat = arg.lower().strip()
            if graphFileFormat == "jpeg": graphFileFormat = "jpg"
            if graphFileFormat != "png" and \
             graphFileFormat != "svg" and \
             graphFileFormat != "jpg" and \
             graphFileFormat != "gif":
                print
                print "Warning: graph format \"%s\" is not supported,\npng will be used instead." % graphFileFormat
                print
                graphFileFormat = "png"

        elif opt in ("--selectednodecolour", "--selectednodecolor"):
            selectedNodeColour = arg
        elif opt in ("--defaultnodecolour", "--defaultnodecolor"):
            defaultNodeColour = arg
        elif opt in ("--errornodecolour", "--errornodecolor"):
            errorNodeColour = arg
        elif opt in ("--mincolour", "--mincolor"):
            minColour = arg
        elif opt in ("--middlecolour", "--middlecolor"):
            middleColour = arg
        elif opt in ("--maxcolour", "--maxcolor"):
            maxColour = arg

        elif opt in ("-h", "--help"):
            printHelp(version)
            sys.exit()
        elif opt in ("--egg", "--easteregg"):
            easterEgg()
            sys.exit()

    if len(inStats) > 0 and len(relationsFile) > 0:
        useSubStats = True

# REGION: FILE NAMES SETUP

    defaultGraphExtension = "." + graphFileFormat

    if len(analysisName) == 0:
        if len(inStats) > 0:
            analysisName = os.path.splitext(os.path.basename(inStats))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(inStats) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(inStats)) > 0:
            analysisFolder = os.path.dirname(inStats)

    # input
    if len(inStats) == 0:
        inStats = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultStatsFile + defaultTableExtension)

    if len(os.path.dirname(inStats)) == 0 and len(analysisFolder) > 0:
        inStats = os.path.join(analysisFolder, inStats)

    if len(os.path.dirname(relationsFile)) == 0 and len(analysisFolder) > 0:
        relationsFile = os.path.join(analysisFolder, relationsFile)

    if len(listOfCategoriesFile) == 0:
        listOfCategoriesFile = os.path.join(
            analysisFolder, analysisName + "_" + defaultListOfCategoriesFile +
            defaultTextExtension)

    if len(os.path.dirname(listOfCategoriesFile)) == 0 and len(
            os.path.basename(listOfCategoriesFile)) > 0:
        listOfCategoriesFile = os.path.join(analysisFolder,
                                            listOfCategoriesFile)

    if len(bigListFile) == 0:
        bigListFile = defaultBigListFile

    if len(os.path.dirname(bigListFile)) == 0 and len(
            os.path.basename(bigListFile)) > 0:
        bigListFile = os.path.join(analysisFolder, bigListFile)

    # output
    if len(outputFile) == 0:
        outputFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutputFile + defaultGVFileExtension)

    if len(graphFile) == 0:
        graphFile = os.path.join(
            analysisFolder, analysisName + "_" + defaultOutputGraphFile +
            defaultGraphExtension)

    if len(logFile) == 0:
        logFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultLogFile + defaultTextExtension)

    if len(os.path.dirname(outputFile)) == 0 and len(
            os.path.basename(outputFile)) > 0:
        outputFile = os.path.join(analysisFolder, outputFile)

    if len(os.path.dirname(graphFile)) == 0 and len(
            os.path.basename(graphFile)) > 0:
        graphFile = os.path.join(analysisFolder, graphFile)

    if len(os.path.dirname(logFile)) == 0 and len(
            os.path.basename(logFile)) > 0:
        logFile = os.path.join(analysisFolder, logFile)

    logList.append([""])
    logList.append(["Input stats file: " + inStats])
    logList.append(["File with categories to check: " + listOfCategoriesFile])
    logList.append(["Output GV table: " + outputFile])
    logList.append(["Output graph table: " + graphFile])
    logList.append(["Output log file: " + logFile])
    logList.append([""])

    # pp.pprint(logList)
    # sys.exit()

    # END REGION: FILE NAMES SETUP

    bigList = stats.load2stringList(bigListFile, removeCommas=True)
    higherElements = stats.load2stringList(listOfCategoriesFile,
                                           removeCommas=True)

    if higherElements[0] == ['id', 'Z', 'n'
                             ] or higherElements[0] == ['id', 'n', 'Z', 'FDR']:
        # this means the list comes from SanXoTSqueezer
        # so the header and the extra columns have to be removed
        higherElements = stats.extractColumns(higherElements[1:], 0)
    else:
        # only removing extra columns and converting list into text
        higherElements = stats.extractColumns(higherElements, 0)

    if useSubStats:
        # añadir salida para log ***
        subData = stats.arrangeSubData(inStats=inStats,
                                       higherElements=higherElements,
                                       relFile=relationsFile,
                                       ignoreNaNsInFDR=False)

    else:
        subData = None

    GVFileText = createGVFileTree(bigList,
                                  higherElements,
                                  subData=subData,
                                  ZLimit=graphLimits,
                                  altMax=altMax,
                                  defaultNodeColour=defaultNodeColour,
                                  errorNodeColour=errorNodeColour,
                                  minColour=minColour,
                                  middleColour=middleColour,
                                  maxColour=maxColour)

    stats.saveTextFile(outputFile, GVFileText)

    createDOTTree(
        outputFile,
        graphFile,
        imageFormat=graphFileFormat,
    )

    stats.saveFile(logFile, logList, "LOG FILE")
Exemplo n.º 6
0
def main(argv):
	
	version = "v0.17"
	analysisName = ""
	analysisFolder = ""
	varianceSeed = 0.001
	FDRLimit = 0.01
	varianceSeedProvided = False
	removeDuplicateUpper = False
	tags = "!out"
	outlierTag = "out"
	logicOperatorsAsWords = False
	dataFile = ""
	relationsFile = ""
	newRelFile = ""
	removedRelFile = ""
	defaultDataFile = "data"
	defaultRelationsFile = "rels"
	defaultTaggedRelFile = "tagged"
	defaultNewRelFile = "cleaned"
	defaultRemovedRelFile = "outliers"
	defaultOutputInfo = "infoFile"
	infoFile = ""
	varFile = ""
	defaultTableExtension = ".tsv"
	defaultTextExtension = ".txt"
	defaultGraphExtension = ".png"
	verbose = True
	oldWay = False # instead of tagging outliers, separating relations files, the old way
	modeUsed = mode.onePerHigher
	logList = [["SanXoTSieve " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

	try:
		opts, args = getopt.getopt(argv, "a:p:v:d:r:n:L:V:f:ubDhH", ["analysis=", "folder=", "varianceseed=", "datafile=", "relfile=", "newrelfile=", "outlierrelfile=", "infofile=", "varfile=", "fdrlimit=", "one-to-one", "no-verbose", "randomise", "removeduplicateupper", "help", "advanced-help", "tags=", "outliertag=", "oldway", "word-operators"])
	except getopt.GetoptError:
		logList.append(["Error while getting parameters."])
		stats.saveFile(infoFile, logList, "INFO FILE")
		sys.exit(2)
	
	if len(opts) == 0:
		printHelp(version)
		sys.exit()

	for opt, arg in opts:
		if opt in ("-a", "--analysis"):
			analysisName = arg
		if opt in ("-p", "--place", "--folder"):
			analysisFolder = arg
		if opt in ("-v", "--var", "--varianceseed"):
			varianceSeed = float(arg)
			varianceSeedProvided = True
		elif opt in ("-d", "--datafile"):
			dataFile = arg
		elif opt in ("-r", "--relfile", "--relationsfile"):
			relationsFile = arg
		elif opt in ("-n", "--newrelfile"):
			removedRelFile = arg
		elif opt in ("-L", "--infofile"):
			infoFile = arg
		elif opt in ("-V", "--varfile"):
			varFile = arg
		elif opt in ("-u", "--one-to-one"):
			modeUsed = mode.onlyOne
		elif opt in ("-b", "--no-verbose"):
			verbose = False
		elif opt in ("--oldway"):
			oldWay = True
		elif opt in ("-f", "--fdrlimit"):
			FDRLimit = float(arg)
		elif opt in ("-D", "--removeduplicateupper"):
			removeDuplicateUpper = True
		elif opt in ("--tags"):
			if arg.strip().lower() != "!out":
				tags = "!out&(" + arg + ")"
		elif opt in ("--word-operators"):
			logicOperatorsAsWords = True
		elif opt in ("--outliertag"):
			outlierTag = "out"
		elif opt in ("-h", "--help"):
			printHelp(version)
			sys.exit()
		elif opt in ("-H", "--advanced-help"):
			printHelp(version, advanced = True)
			sys.exit()
	
# REGION: FILE NAMES SETUP
			
	if len(analysisName) == 0:
		if len(dataFile) > 0:
			analysisName = os.path.splitext(os.path.basename(dataFile))[0]
		else:
			analysisName = defaultAnalysisName

	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
			
	if len(dataFile) > 0 and len(analysisFolder) == 0:
		if len(os.path.dirname(dataFile)) > 0:
			analysisFolder = os.path.dirname(dataFile)

	# input
	if len(dataFile) == 0:
		dataFile = os.path.join(analysisFolder, analysisName + "_" + defaultDataFile + defaultTableExtension)
		
	if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0:
		dataFile = os.path.join(analysisFolder, dataFile)
	
	if len(os.path.dirname(varFile)) == 0 and len(os.path.basename(varFile)) > 0:
		varFile = os.path.join(analysisFolder, varFile)
		
	if len(varFile) > 0 and not varianceSeedProvided:
		varianceSeed, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = verbose, defaultSeed = varianceSeed)
		if not varianceOk:
			logList.append(["Variance not found in text file."])
			stats.saveFile(infoFile, logList, "INFO FILE")
			sys.exit()
	
	if len(relationsFile) == 0:
		relationsFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension)
	
	if len(os.path.dirname(relationsFile)) == 0:
		relationsFile = os.path.join(analysisFolder, relationsFile)
	
	# output
	if len(newRelFile) == 0:
		if oldWay: # suffix: "cleaned"
			newRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultNewRelFile + defaultTableExtension)
		else: # suffix: "tagged"
			newRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultTaggedRelFile + defaultTableExtension)
	
	if len(removedRelFile) == 0:
		removedRelFile = os.path.join(analysisFolder, analysisName + "_" + defaultRemovedRelFile + defaultTableExtension)
	
	if len(os.path.dirname(newRelFile)) == 0:
		newRelFile = os.path.join(analysisFolder, newRelFile)
		
	if len(os.path.dirname(removedRelFile)) == 0:
		removedRelFile = os.path.join(analysisFolder, removedRelFile)
	
	if len(infoFile) == 0:
		infoFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputInfo + defaultTextExtension)
	
	logList.append(["Variance seed = " + str(varianceSeed)])
	logList.append(["Input data file: " + dataFile])
	logList.append(["Input relations file: " + relationsFile])
	if oldWay:
		logList.append(["Output relations file without outliers: " + newRelFile])
		logList.append(["Output relations file with outliers only: " + removedRelFile])
		logList.append(["Removing duplicate higher level elements: " + str(removeDuplicateUpper)])
		logList.append(["OldWay option activated: outliers are removed instead of tagged"])
	else:
		logList.append(["Relations file tagging outliers: " + newRelFile])
		logList.append(["Tags to filter relations: " + tags])
		logList.append(["Tag used for outliers: " + outlierTag])

	# pp.pprint(logList)
	# sys.exit()

# END REGION: FILE NAMES SETUP
	
	relations = stats.loadRelationsFile(relationsFile)
	data = stats.loadInputDataFile(dataFile)
	
	if oldWay:
		# only for backward compatibility. Note that tags are not supported
		newRelations, removedRelations, logResults = \
								getRelationsWithoutOutliers(data,
										relations,
										varianceSeed,
										FDRLimit = FDRLimit,
										modeUsed = modeUsed,
										removeDuplicateUpper = removeDuplicateUpper)
	else:
		newRelations, removedRelations, logResults = \
								tagRelationsWithoutOutliers(data,
										relations,
										varianceSeed,
										FDRLimit = FDRLimit,
										modeUsed = modeUsed,
										removeDuplicateUpper = removeDuplicateUpper,
										tags = tags,
										outlierTag = outlierTag,
										logicOperatorsAsWords = logicOperatorsAsWords)
		
	if oldWay:
		stats.saveFile(newRelFile, newRelations, "idsup\tidinf")
	else:
		stats.saveFile(newRelFile, newRelations, "idsup\tidinf\ttags")
		
	stats.saveFile(infoFile, logList, "INFO FILE")
	
	if oldWay:
		stats.saveFile(removedRelFile, removedRelations, "idsup\tidinf")
Exemplo n.º 7
0
def main(argv):

    version = "v0.14"
    verbose = False
    analysisName = ""
    defaultAnalysisName = "squeeze"
    analysisFolder = ""
    # parametres
    minimumElements = 2
    maximumElements = 1e6
    maximumFDR = 0.05
    minimumZ = 0.0  # take all by default
    filterByFDR = True  # if false, then it filters by abs(Z)
    # input files
    lowerStats = ""
    higherStats = ""
    defaultLowerStatsFile = "lower"
    defaultHigherStatsFile = "upper"
    defaultTableExtension = ".tsv"
    defaultTextExtension = ".txt"
    defaultGraphExtension = ".png"
    defaultOutputFile = "outList"
    defaultLogFile = "logFile"
    # output files
    logFile = ""
    outputFile = ""
    logList = [["SanXoTSqueezer " + version],
               ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

    try:
        opts, args = getopt.getopt(argv, "a:l:L:o:p:u:n:N:f:z:h", [
            "analysis=", "lowerstats=", "logfile=", "outputfile=", "place=",
            "minelements=", "maxelements=", "fdr=", "sigmas=", "help"
        ])
    except getopt.GetoptError:
        logList.append(["Error while getting parameters."])
        sys.exit(2)

    if len(opts) == 0:
        printHelp(version)
        sys.exit()

    for opt, arg in opts:
        if opt in ("-a", "--analysis"):
            analysisName = arg
        if opt in ("-p", "--place", "--folder"):
            analysisFolder = arg
        if opt in ("-l", "--lowerstats"):
            lowerStats = arg
        if opt in ("-u", "--upperstats"):
            higherStats = arg
        elif opt in ("-L", "--logfile"):
            logFile = arg
        elif opt in ("-o", "--outputfile"):
            outputFile = arg
        elif opt in ("-n", "--minelements"):
            minimumElements = int(arg)
        elif opt in ("-N", "--maxelements"):
            maximumElements = int(arg)
        elif opt in ("-f", "--fdr"):
            maximumFDR = float(arg)
        elif opt in ("-z", "--sigmas"):
            filterByFDR = False
            minimumZ = float(arg)
        elif opt in ("-h", "--help"):
            printHelp(version)
            sys.exit()

# REGION: FILE NAMES SETUP

    if len(analysisName) == 0:
        if len(lowerStats) > 0:
            analysisName = os.path.splitext(os.path.basename(lowerStats))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(lowerStats) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(lowerStats)) > 0:
            analysisFolder = os.path.dirname(lowerStats)

    # input
    if len(lowerStats) == 0:
        lowerStats = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultLowerStatsFile + defaultTableExtension)

    if len(higherStats) == 0:
        higherStats = os.path.join(
            analysisFolder, analysisName + "_" + defaultHigherStatsFile +
            defaultTableExtension)

    if len(os.path.dirname(lowerStats)) == 0 and len(analysisFolder) > 0:
        lowerStats = os.path.join(analysisFolder, lowerStats)

    if len(os.path.dirname(higherStats)) == 0 and len(analysisFolder) > 0:
        higherStats = os.path.join(analysisFolder, higherStats)

    # output
    if len(outputFile) == 0:
        outputFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutputFile + defaultTableExtension)

    if len(logFile) == 0:
        logFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultLogFile + defaultTextExtension)

    if len(os.path.dirname(outputFile)) == 0 and len(
            os.path.basename(outputFile)) > 0:
        outputFile = os.path.join(analysisFolder, outputFile)

    if len(os.path.dirname(logFile)) == 0 and len(
            os.path.basename(logFile)) > 0:
        logFile = os.path.join(analysisFolder, logFile)

    logList.append([""])
    logList.append(["Lower input stats file: " + lowerStats])
    logList.append(["Higher input stats file: " + higherStats])
    logList.append(["Output list: " + outputFile])
    logList.append(["Output log file: " + logFile])
    logList.append(
        ["Minimum elements in higher category: " + str(minimumElements)])
    logList.append(
        ["Maximum elements in higher category: " + str(maximumElements)])
    logList.append(["Minimum z: " + str(minimumZ)])
    logList.append([""])

    # pp.pprint(logList)
    # sys.exit()

    # END REGION: FILE NAMES SETUP

    try:
        lowerData = stats.loadStatsDataFile(lowerStats)
        logList.append(["Lower data files correctly loaded."])
    except getopt.GetoptError:
        logList.append(["Error while getting lower data files."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    try:
        higherData = stats.loadStatsDataFile(higherStats)
        logList.append(["Higher data files correctly loaded."])
    except getopt.GetoptError:
        logList.append(["Error while getting higher data files."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    try:
        filteredList = filterNFDRorZ(lowerData,
                                     higherData,
                                     minN=minimumElements,
                                     maxN=maximumElements,
                                     minZ=minimumZ,
                                     maxFDR=maximumFDR,
                                     useFDR=filterByFDR)
        filteredList = stats.sortByIndex(filteredList, 1)
        logList.append(["Data correctly filtered."])
    except getopt.GetoptError:
        logList.append(["Error while getting data filtered by N and Z."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    try:
        stats.saveFile(outputFile, filteredList, "id\tn\tZ\tFDR\tX")
        logList.append(["Output data correctly saved."])
    except getopt.GetoptError:
        logList.append(["Error while saving output data."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    stats.saveFile(logFile, logList, "LOG FILE")
Exemplo n.º 8
0
def main(argv):

    version = "v0.23"
    verbose = False
    showGraph = True
    graphLimits = 6.0
    showLegend = True
    graphDPI = 100  # default of Matplotlib's savefig method
    graphLineWidth = 1.0
    graphFontSize = 8
    analysisName = ""
    defaultAnalysisName = "sanxot"
    analysisFolder = ""
    # input files
    inStats = ""
    defaultStatsFile = "stats"
    defaultRelationsFile = "rels"
    defaultTableExtension = ".tsv"
    defaultTextExtension = ".txt"
    defaultGraphExtension = ".png"
    relationsFile = ""
    upperLevelToGraphFile = ""
    # output files
    defaultUpperLevelToGraphFile = "ulst"
    defaultOutputGraph = "outGraph"
    defaultOutputFile = "outSigmoids"
    defaultExtraTableFile = "outExtra"
    defaultLogFile = "logFile"
    logFile = ""
    graphFile = ""
    outputFile = ""
    extraTableFile = ""
    graphTitle = "Z plot"
    labelFontSize = 12
    minimalGraphTicks = False
    xLabel = "Zij"
    yLabel = "Rank/N"

    logList = [["SanXoTGauss " + version],
               ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

    try:
        opts, args = getopt.getopt(
            argv, "a:p:z:r:c:L:G:o:l:d:W:s:x:t:Z:hgkT", [
                "analysis=", "folder=", "statsfile=", "relfile=", "list=",
                "logfile=", "graphfile=", "outputfile=", "graphlimits=",
                "graphfontsize=", "graphdpi=", "graphlinewidth=",
                "extratable=", "graphtitle=", "labelfontsize=", "help",
                "no-graph", "no-legend", "minimalgraphticks", "xlabel=",
                "ylabel="
            ])
    except getopt.GetoptError:
        logList.append(["Error while getting parameters."])
        sys.exit(2)

    if len(opts) == 0:
        printHelp(version)
        sys.exit()

    for opt, arg in opts:
        if opt in ("-a", "--analysis"):
            analysisName = arg
        if opt in ("-p", "--place", "--folder"):
            analysisFolder = arg
        if opt in ("-z", "--statsfile"):
            inStats = arg
        elif opt in ("-r", "--relfile", "--relationsfile"):
            relationsFile = arg
        elif opt in ("-c", "--list"):
            upperLevelToGraphFile = arg
        elif opt in ("-L", "--logfile"):
            logFile = arg
        elif opt in ("-G", "--graphfile"):
            graphFile = arg
        elif opt in ("-g", "--no-graph"):
            showGraph = False
        elif opt in ("-k", "--no-legend"):
            showLegend = False
        elif opt in ("-o", "--outputfile"):
            outputFile = arg
        elif opt in ("-x", "--extratable"):
            extraTableFile = arg
        elif opt in ("-s", "--graphfontsize"):
            graphFontSize = int(arg)
        elif opt in ("-d", "--graphdpi"):
            graphDPI = float(arg)
        elif opt in ("-W", "--graphlinewidth"):
            graphLineWidth = float(arg)
        elif opt in ("-t", "--graphtitle"):
            graphTitle = arg
        elif opt in ("-Z", "--labelfontsize"):
            labelFontSize = float(arg)
        elif opt in ("-l", "--graphlimits"):
            graphLimits = float(arg)
        elif opt in ("-T", "--minimalgraphticks"):
            minimalGraphTicks = True
        elif opt in ("--xlabel"):
            xLabel = arg
        elif opt in ("--ylabel"):
            yLabel = arg
        elif opt in ("-h", "--help"):
            printHelp(version)
            sys.exit()

# REGION: FILE NAMES SETUP

    if len(analysisName) == 0:
        if len(inStats) > 0:
            analysisName = os.path.splitext(os.path.basename(inStats))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(inStats) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(inStats)) > 0:
            analysisFolder = os.path.dirname(inStats)

    # input
    if len(inStats) == 0:
        inStats = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultStatsFile + defaultTableExtension)

    if len(os.path.dirname(inStats)) == 0 and len(analysisFolder) > 0:
        inStats = os.path.join(analysisFolder, inStats)

    if len(relationsFile) == 0:
        relationsFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultRelationsFile + defaultTableExtension)

    if len(upperLevelToGraphFile) == 0:
        upperLevelToGraphFile = os.path.join(
            analysisFolder, analysisName + "_" + defaultUpperLevelToGraphFile +
            defaultTextExtension)

    if len(os.path.dirname(relationsFile)) == 0 and len(
            os.path.basename(relationsFile)) > 0:
        relationsFile = os.path.join(analysisFolder, relationsFile)

    if len(os.path.dirname(upperLevelToGraphFile)) == 0 and len(
            os.path.basename(upperLevelToGraphFile)) > 0:
        upperLevelToGraphFile = os.path.join(analysisFolder,
                                             upperLevelToGraphFile)

    # output
    if len(outputFile) == 0:
        outputFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutputFile + defaultTableExtension)

    if len(extraTableFile) == 0:
        extraTableFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultExtraTableFile + defaultTableExtension)

    if len(logFile) == 0:
        logFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultLogFile + defaultTextExtension)

    if len(graphFile) == 0:
        graphFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutputGraph + defaultGraphExtension)

    if len(os.path.dirname(outputFile)) == 0 and len(
            os.path.basename(outputFile)) > 0:
        outputFile = os.path.join(analysisFolder, outputFile)

    if len(os.path.dirname(extraTableFile)) == 0 and len(
            os.path.basename(extraTableFile)) > 0:
        extraTableFile = os.path.join(analysisFolder, extraTableFile)

    if len(os.path.dirname(logFile)) == 0 and len(
            os.path.basename(logFile)) > 0:
        logFile = os.path.join(analysisFolder, logFile)

    if len(os.path.dirname(graphFile)) == 0 and len(
            os.path.basename(graphFile)) > 0:
        graphFile = os.path.join(analysisFolder, graphFile)

    logList.append([""])
    logList.append(["Input stats file: " + inStats])
    logList.append(["Relations file: " + relationsFile])
    logList.append(["File with sigmoids to depict: " + upperLevelToGraphFile])
    logList.append(["Output sigmoids table: " + outputFile])
    logList.append(["Output extra table: " + extraTableFile])
    logList.append(["Output log file: " + logFile])
    logList.append(["Output graph file: " + graphFile])
    logList.append([""])

    # pp.pprint(logList)
    # sys.exit()

    # END REGION: FILE NAMES SETUP
    try:
        data, logListExtraInfo = stats.arrangeSubData(
            inStats=inStats,
            uFile=upperLevelToGraphFile,
            relFile=relationsFile,
            ignoreNaNsInFDR=True)
        logList.append(logListExtraInfo)
        logList.append(["Data files correctly loaded."])
    except getopt.GetoptError:
        logList.append(["Error while getting data files."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    try:
        bigTable, bigTableHeader, extraTable, extraHeader = stats.createBigTable(
            data)
        logList.append(["Sigmoid table correctly generated."])
    except getopt.GetoptError:
        logList.append(["Error while generating sigmoid table."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    try:
        stats.saveFile(outputFile, bigTable, bigTableHeader)
        logList.append(["Sigmoid table correctly saved."])
    except getopt.GetoptError:
        logList.append(["Error while saving sigmoid table."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    try:
        stats.saveFile(extraTableFile, extraTable, extraHeader)
        logList.append(["Extra table correctly saved."])
    except getopt.GetoptError:
        logList.append(["Error while saving extra table."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    try:
        stats.graphZij(data,
                       graphLimits=graphLimits,
                       graphTitle=graphTitle,
                       graphFile=graphFile,
                       showGraph=showGraph,
                       manySigmoids=True,
                       showLegend=showLegend,
                       dpi=graphDPI,
                       graphFontSize=graphFontSize,
                       lineWidth=graphLineWidth,
                       labelFontSize=labelFontSize,
                       minimalGraphTicks=minimalGraphTicks,
                       xLabel=xLabel,
                       yLabel=yLabel)
        logList.append(["Graph correctly saved."])
    except getopt.GetoptError:
        logList.append(["Error while saving graph."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    stats.saveFile(logFile, logList, "LOG FILE")
Exemplo n.º 9
0
def main(options, programName, programVersion):

## REGION: DEFAULT VALUES AND VARIABLE ACQUISITION

	# *-*-* add any default values here, such as default names of files
	# basic default info
	inputFile = ""
	MSFFileCol = ""
	RAWFileCol = ""
	scanNumberCol = ""
	chargeCol = ""
	pepSequenceCol = ""
	XCorrCol = ""
	initialRow = 1
	verbose = False
	QuiXMLResults = False # otherwise, QuiXML will be considered
	changeOriginalMSFFile = False # False = copy MSF to MSF_zeroed, True = change original file
	
	logFile = ""
	analysisName = ""
	analysisFolder = ""
	defaultAnalysisName = programName.lower()
	
	# default extensions
	defaultTableExtension = ".xls"
	defaultTextExtension = ".txt"
	
	# default file names
	defaultLogFile = "logFile"
	leadingFile = "" # *-*-* change this by the data file or any important file defining the operation
	
	# basic log file
	logList = [[programName + " " + programVersion], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

	# parsing arguments from commandline
	# *-*-* add any arguments used
	options.add_argument("-a", "--analysis", type = str, default = "", required = True, help = "Use a prefix for the output files.")
	options.add_argument("-p", "--place", type = str, default = "", required = True, help = "To use a different common folder for the output files. If this is not provided, the the folder used will be the same as the FASTA file folder.")
	options.add_argument("-L", "--logfile", type = str, default = "", required = False, help = "To use a non-default name for the log file.")
	
	# -d[nombre archivo donde están los xcorr = 0]
	# -M[columna con nombre del msf]
	# -r[rawfilecol]
	# -s[scannumbercol]
	# -q[chargecol]
	# -P[pepsequencecol]
	# -x[xcorrcol]
	# -R[initialrow]
	options.add_argument("-d", "--inputfile", type = str, default = "", required = True, help = "Name of the text file containing the list of PSMs to keep in the MSF.")
	options.add_argument("-M", "--msffile", type = str, default = "", required = True, help = "Name of the MSF file having the PSMs to modify.")
	options.add_argument("-r", "--rawfilecol", type = str, default = "", required = False, help = "Header of the column contaning the name of the RAW files. Default is \"RAWFileName\".")
	options.add_argument("-s", "--scannumbercol", type = str, default = "", required = False, help = "Header of the column containing the scan numbers. Default is \"FirstScan\".")
	options.add_argument("-q", "--chargecol", type = str, default = "", required = False, help = "Header of the column containing the charge. Default is \"Charge\".")
	options.add_argument("-S", "--pepsequencecol", type = str, default = "", required = False, help = "Header of the column containing the identified peptide sequences. Default is \"Sequence\"")
	options.add_argument("-x", "--xcorrcol", type = str, default = "", required = False, help = "Header of the column containing the XCorr. Default is \"XC1D\".")
	options.add_argument("-R", "--initialrow", type = str, default = "1", required = False, help = "The position of the row containing the headers. Default is 1.")
	options.add_argument("-v", "--verbose", action='store_true', help = "Show extra info while operating.")
	options.add_argument("-Q", "--quixml", action='store_true', help = "Use column headers for QuiXML results tab separated table file (otherwise, pRatio results file headers will be used by default).")
	options.add_argument("-O", "--changeoriginalmsf", action='store_true', help = "Do not copy the MSF file to be modified, just remove bad PSMs in the original file.")
	
	# *-*-* add easter egg if wanted
	
	arguments = options.parse_args()
	
	# copying parsed arguments
	# copy any arguments used
	
	verbose = arguments.verbose
	QuiXMLResults = arguments.quixml
	changeOriginalMSFFile = arguments.changeoriginalmsf
	
	if QuiXMLResults:
		RAWFileCol = "RAWFileName"
		scanNumberCol = "FirstScan"
		chargeCol = "Charge"
		pepSequenceCol = "Sequence"
		XCorrCol = "XC1D"
		initialRow = 24
	else: # pRatio results are default
		RAWFileCol = "RAWFile"
		scanNumberCol = "FirstScan"
		chargeCol = "Charge"
		pepSequenceCol = "Sequence"
		XCorrCol = "Xcorr1Search"
		initialRow = 1
		
	
	if len(arguments.analysis) > 0: analysisName = arguments.analysis
	if len(arguments.place) > 0: analysisFolder = arguments.place
	if len(arguments.logfile) > 0: logfile = arguments.logFile

	if len(arguments.inputfile) > 0: inputFile = arguments.inputfile
	if len(arguments.msffile) > 0: MSFFile = arguments.msffile
	if len(arguments.rawfilecol) > 0: RAWFileCol = arguments.rawfilecol
	if len(arguments.scannumbercol) > 0: scanNumberCol = arguments.scannumbercol
	if len(arguments.chargecol) > 0: chargeCol = arguments.chargecol
	if len(arguments.pepsequencecol) > 0: pepSequenceCol = arguments.pepsequencecol
	if len(arguments.xcorrcol) > 0: XCorrCol = arguments.xcorrcol
	if len(arguments.initialrow) > 0: initialRow = int(arguments.initialrow)
	
	RAWFileCol = "[" + RAWFileCol + "]"
	scanNumberCol = "[" + scanNumberCol + "]"
	chargeCol = "[" + chargeCol + "]"
	pepSequenceCol = "[" + pepSequenceCol + "]"
	XCorrCol = "[" + XCorrCol + "]"
	
## END REGION: DEFAULT VALUES AND VARIABLE ACQUISITION
## **********************************************************
## REGION: FILE NAMES SETUP

	# replace leadingfile by the real leading file
	if len(analysisName) == 0:
		if len(leadingFile) > 0:
			analysisName = os.path.splitext(os.path.basename(leadingFile))[0]
		else:
			analysisName = defaultAnalysisName

	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
			
	# input

	# *-*-* add input file setup
	
	if len(os.path.dirname(inputFile)) == 0:
		inputFile = os.path.join(analysisFolder, inputFile)
	
	if len(os.path.dirname(MSFFile)) == 0:
		MSFFile = os.path.join(analysisFolder, MSFFile)
	
	if os.path.splitext(MSFFile)[1] != ".msf":
		print
		print "Warning: your MSF file does not seem to be an MSF file."

	if changeOriginalMSFFile:
		newMSFFile = MSFFile
	else:
		newMSFFile = os.path.splitext(MSFFile)[0] + "_zeroed" + os.path.splitext(MSFFile)[1]
	
	# output
	
	# *-*-* add output file setup
	if len(logFile) == 0:
		logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension)
	if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0:
		logFile = os.path.join(analysisFolder, logFile)

	# *-*-* add to the log the input and output filenames
	logList.append([""])
	logList.append(["Output log file: " + logFile])
	logList.append([""])

## END REGION: FILE NAMES SETUP			
## **********************************************************
## REGION: PROGRAM BASIC STRUCTURE

	# *-*-* add basic structure
	
	print
	
	if not changeOriginalMSFFile:
		message = "Creating new file: %s" % newMSFFile
		logList.append([message])
		print message
		shutil.copyfile(MSFFile, newMSFFile)
	
	message = "Loading PSMs to keep from: %s" % inputFile
	logList.append([message])
	print message
	myList = getDataFromTXT(inputFile,
				iField = RAWFileCol,
				jField = scanNumberCol,
				kField = chargeCol,
				lField = pepSequenceCol,
				initialRow = initialRow,
				filterString = XCorrCol  + ">0",
				removeDuplicates = True,
				removeCommas = True)
	
	if len(myList) == 0:
		message = "\nWarning!! No PSMs found in the text file provided,\nDeleting all XCorrs.\n"
	else:
		message = "PSMs found in text file: %i" % len(myList)
	logList.append([message])
	print message
	
	
	compareListIdentified = []
	for element in myList:
		
		myRAWFilePath = element[0]
		myRAWFile = os.path.basename(myRAWFilePath)
		myScanNumber = element[1]
		myCharge = element[2]
		myPepSequence = element[3] # currently unused
		
		compareListIdentified.append([myRAWFile, int(myScanNumber), int(myCharge)])
	
	
	
	connexion = sqlite3.connect(newMSFFile)
	c = connexion.cursor()
	
	existingScanQuery = """select
	p.peptideid,
	fi.filename,
	sh.firstscan,
	sh.lastscan,
	sh.charge,
	p.sequence,
	ps.scorevalue
from
	peptides p,
	peptideScores ps,
	spectrumHeaders sh,
	massPeaks mp,
	fileInfos fi,
	processingNodeScores scoreNames
where
	p.peptideid = ps.peptideid and
	sh.spectrumid = p.spectrumid and
	(fi.fileid = mp.fileid or mp.fileid = -1) and
	mp.masspeakid = sh.masspeakid and
	scoreNames.scoreid = ps.scoreid and
	scoreNames.ScoreName = 'Xcorr'
order by fi.filename desc,
	sh.firstscan asc,
	sh.lastscan asc,
	sh.charge asc,
	ps.scorevalue desc;"""
	
	message = "Searching all PSMs in database..."
	logList.append([message])
	print message
	wholeMSFList = []	
	for psm in c.execute(existingScanQuery):
		wholeMSFList.append(psm)
	
	checked = 0
	changed = 0
	alreadyZero = 0
	
	message = "Zeroing...!"
	logList.append([message])
	message = "Original MSF contains %i PSMs."  % len(wholeMSFList)
	logList.append([message])
	logList.append([""])
	logList.append(["Zeroed\tRAWFileName\tScanNumber\tCharge\tSequence (without PTMs)\tOriginal XCorr"])
	
	print message
	for psm in wholeMSFList:
		checked += 1
		pepId = int(psm[0])
		rawFilePath = str(psm[1])
		rawFile = os.path.basename(rawFilePath)
		firstScan = int(psm[2])
		lastScan = int(psm[3])
		charge = int(psm[4])
		sequence = str(psm[5])
		XCorr = float(psm[6])
		# now check whether this is in the given list. If not, then make XCorr = 0
		
		# compareListIdentified.append([myRAWFile, myScanNumber, myCharge, myPepSequence])
		compareListInMSF = [rawFile, firstScan, charge]
		
		msfScanPresentInList = (compareListInMSF in compareListIdentified)
		
		if XCorr == 0:
			alreadyZero += 1

		if not msfScanPresentInList and XCorr != 0:
			changed += 1
			zeroSettingQuery = """update
	peptideScores
set
	scoreValue = 0
where
	peptideID in (
		select
			p.peptideID
		from
			fileinfos fi,
			massPeaks mp,
			spectrumHeaders sh,
			peptides p
		where
			(fi.fileid = mp.fileid or mp.fileid = -1) and
			mp.masspeakid = sh.masspeakid and
			sh.spectrumid = p.spectrumid and
			sh.firstScan = %i and
			sh.charge = %i and
			fi.fileName like "%%%s"
	) and
	scoreID = (
		select
			scoreId
		from
			processingNodeScores
		where
			scoreName = "XCorr"
			);""" % (firstScan, charge, rawFile)
			# pdb.set_trace()
			c.execute(zeroSettingQuery)
			message = """%i\t%s\t%i\t%i\t%s\t%f""" % (checked, rawFile, firstScan, charge, sequence, XCorr)
			logList.append([message])
			
			if verbose:
				message = """Zeroed (%i/%i):\nraw = "%s"\nscan = %i\ncharge = %i\nsequence = %s\nXCorr = %f\n""" % (checked, len(wholeMSFList), rawFile, firstScan, charge, sequence, XCorr)
				print message
	
	print
	message = "PTMs found: %i,\nPTMs zeroed: %i,\nDifference: %i,\nAlready zero: %i" % (checked, changed, checked - changed, alreadyZero)
	print message
	logList.append([""])
	logList.append([message])
	print
	print "Saving changes..."
	connexion.commit()
	print
	print "Closing connexion..."
	connexion.close()
		
## END REGION: PROGRAM BASIC STRUCTURE
## **********************************************************
## REGION: SAVING FILES
	
	# *-*-* add any files to be saved here
	try:
		# operations here
		logList.append(["Most probably, everything went fine."])
	except getopt.GetoptError:
		logList.append(["Error."])
		stats.saveFile(logFile, logList, "LOG FILE")
		sys.exit(2)
	
	stats.saveFile(logFile, logList, "LOG FILE")
	
	print "Done!"
Exemplo n.º 10
0
def main(argv):
	
	version = "v0.05"
	analysisName = ""
	analysisFolder = ""
	logFile = ""
	
	# in data
	prefix = ""
	extraPrefix = ""
	medianTag = "med"
	
	# default filenames
	defaultInfoFileSuffix = "_infoFile.txt"
	defaultLogFile = "logFile"
	defaultAnalysisName = "medianSelection"
	
	# default extensions
	defaultTableExtension = ".xls"
	defaultTextExtension = ".txt"
	
	verbose = True
	logList = [["Anselmo " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

	try:
		opts, args = getopt.getopt(argv, "a:p:f:g:m:L:hH", ["analysis=", "folder=",  "prefix=", "extraprefix=", "mediantag=", "logfile=", "help", "egg", "easteregg"])
	except getopt.GetoptError:
		logList.append(["Error while getting parameters."])
		stats.saveFile(infoFile, logList, "INFO FILE")
		sys.exit(2)

	if len(opts) == 0:
		printHelp(version, True)
		sys.exit()

	for opt, arg in opts:
		if opt in ("-a", "--analysis"):
			analysisName = arg
		elif opt in ("-p", "--place", "--folder"):
			analysisFolder = arg
		elif opt in ("-f", "--prefix"):
			prefix = arg
		elif opt in ("-g", "--extraprefix"):
			extraPrefix = arg
		elif opt in ("-L", "--logfile"):
			logFile = arg
		elif opt in ("-m", "--mediantag"):
			medianTag = arg
		elif opt in ("-h", "--help"):
			printHelp(version)
			sys.exit()
		elif opt in ("-H", "--advanced-help"):
			printHelp(version, advanced = True)
			sys.exit()
		elif opt in ("--egg", "--easteregg"):
			easterEgg()
			sys.exit()
	
# REGION: FILE NAMES SETUP
			
	if len(analysisName) == 0:
		analysisName = defaultAnalysisName

	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
	
	# next "if" disables extra copy when extraPrefix is same as prefix
	if len(extraPrefix) > 0 and extraPrefix == prefix:
		extraPrefix = ""
	# input
	
	# output
	
	if len(logFile) == 0:
		logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension)
	
	##logList.append(["Median variance = " + "poner***"])

# END REGION: FILE NAMES SETUP
	
	# get infoFile list
	infoFileList = glob.glob(os.path.join(analysisFolder, prefix + "*" + defaultInfoFileSuffix))
	
	logList.append([])
	logList.append(["Folder = " + analysisFolder])
	logList.append([])
	logList.append(["Info files with prefix \"%s\"" % prefix])
	
	varList = []
	for varFile in infoFileList:
		variance, varianceOk = stats.extractVarianceFromVarFile(varFile, verbose = False)
		if varianceOk:
			varList.append([varFile, variance])
	
	# get info file with median variance
	
	varList = stats.sortByIndex(varList, 1)
	medianVariance = stats.medianByIndex(varList, 1)
	
	medianIndex = getMedianIndex(varList = varList, variance = medianVariance)
	
	for element in varList:
		if element[0] == varList[medianIndex][0]:
			logList.append(["%s, variance = %f [taken]" % (os.path.basename(element[0]), element[1])])
		else:
			logList.append(["%s, variance = %f" % (os.path.basename(element[0]), element[1])])
	
	# get prefix of median experiment
	
	medianInfoFile = os.path.basename(varList[medianIndex][0])
	randTag = medianInfoFile[len(prefix):len(medianInfoFile) - len(defaultInfoFileSuffix)]
	medianPrefix = prefix + randTag
	extraMedianPrefix = ""
	if len(extraPrefix) > 0: extraMedianPrefix = extraPrefix + randTag
		
	
	# get file list with specific prefix
	medianExperimentFileList = glob.glob(os.path.join(analysisFolder, medianPrefix + "*.*"))
	extraPrefixFileList = []
	if len(extraMedianPrefix) > 0:
		extraPrefixFileList = glob.glob(os.path.join(analysisFolder, extraMedianPrefix + "*.*"))
	
	# copy files including median tag
	extraLogList = copyFilesWithPrefix(fileList = medianExperimentFileList,
				folder = analysisFolder,
				prefix = prefix,
				message = "Renamed files:",
				tag = medianTag)
	logList.extend(extraLogList)
	
	if len(extraPrefixFileList) > 0:
		extraLogList = copyFilesWithPrefix(fileList = extraPrefixFileList,
					folder = analysisFolder,
					prefix = extraPrefix,
					message = "Renamed extra files:",
					tag = medianTag)
		logList.extend(extraLogList)
	
	# save logFile
	
	stats.saveFile(logFile, logList, "INFO FILE")
Exemplo n.º 11
0
def main(options, programName, programVersion):

## REGION: DEFAULT VALUES AND VARIABLE ACQUISITION

	# basic default info
	logFile = ""
	analysisName = ""
	analysisFolder = ""
	defaultAnalysisName = programName.lower()
	
	relFile = ""
	DBFile = ""
	FASTAFile = ""
	previousFile = ""
	accNumCol = 1
	catCol = 2
	catPrefix = ""
	header = "idsup\tidinf"
	
	previousList = []
	
	# default extensions
	defaultTableExtension = ".tsv"
	defaultTextExtension = ".txt"
	
	# default file names	
	defaultLogFile = "logFile"
	defaultRelFile = "rels"
	
	# basic log file
	logList = [[programName + " " + programVersion], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

	# parsing arguments from commandline
	options.add_argument("-a", "--analysis", type = str, default = "", required = True, help = "Use a prefix for the output files.")
	options.add_argument("-p", "--place", type = str, default = "", required = True, help = "To use a different common folder for the output files. If this is not provided, the the folder used will be the same as the FASTA file folder.")
	options.add_argument("-L", "--logfile", type = str, default = "", required = False, help = "To use a non-default name for the log file.")
	options.add_argument("-d", "--dbfile", type = str, default = "", required = True, help = "The input file containing accession numbers and categories.")
	options.add_argument("-x", "--previousfile", type = str, default = "", required = False, help = "An optional relation file to which concatenate resulting relations (if omitted, a new file will be produced).")
	options.add_argument("-q", "--accnumcol", type = str, default = "1", required = False, help = "Column where accession numbers of genes/proteins are. First column is 1. Default is 1.")
	options.add_argument("-c", "--categorycol", type = str, default = "2", required = False, help = "Column where categories are. First column is 1. Default is 2.")
	options.add_argument("-f", "--prefix", type = str, default = "", required = False, help = "Prefix to add to all categories found in this parsing (such as \"GO-full_\", \"Panther_\", or \"KEGG=2017-01-10_\".")
	options.add_argument("--fasta", type = str, default = "", required = False, help = "FASTA file contaning the identifiers we want to replace by FASTA headers in the final file. Note that identifiers not appearing in this FASTA file will be removed from the final list.")
	# add string with category separator
	# add string with accNum separator
	
	
	# *-*-* add easter egg if wanted
	
	arguments = options.parse_args()
	
	# copying parsed arguments
	# copy any arguments used
	if len(arguments.analysis) > 0: analysisName = arguments.analysis
	if len(arguments.place) > 0: analysisFolder = arguments.place
	if len(arguments.logfile) > 0: logFile = arguments.logfile
	if len(arguments.dbfile) > 0: DBFile = arguments.dbfile
	if len(arguments.fasta) > 0: FASTAFile = arguments.fasta
	if len(arguments.previousfile) > 0: previousFile = arguments.previousfile
	if len(arguments.accnumcol) > 0: accNumCol = int(arguments.accnumcol)
	if len(arguments.categorycol) > 0: catCol = int(arguments.categorycol)
	if len(arguments.prefix) > 0: catPrefix = arguments.prefix
	
## END REGION: DEFAULT VALUES AND VARIABLE ACQUISITION
## **********************************************************
## REGION: FILE NAMES SETUP

	if len(analysisName) == 0:
		if len(DBFile) > 0:
			analysisName = os.path.splitext(os.path.basename(DBFile))[0]
		else:
			analysisName = defaultAnalysisName

	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
			
	# input

	if len(os.path.dirname(DBFile)) == 0:
		DBFile = os.path.join(analysisFolder, DBFile)
	
	if len(previousFile) > 0:
		if len(os.path.dirname(previousFile)) == 0:
			previousFile = os.path.join(analysisFolder, previousFile)
	
	if len(FASTAFile) > 0:
		if len(os.path.dirname(FASTAFile)) == 0:
			FASTAFile = os.path.join(analysisFolder, FASTAFile)
		
	# output
	
	if len(logFile) == 0:
		logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension)
	if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0:
		logFile = os.path.join(analysisFolder, logFile)
		
	if len(relFile) == 0:
		relFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelFile + defaultTableExtension)
	if len(os.path.dirname(relFile)) == 0 and len(os.path.basename(relFile)) > 0:
		relFile = os.path.join(analysisFolder, relFile)

	logList.append([""])
	logList.append(["Input table with categories and proteins: " + DBFile])
	if len(previousFile) > 0:
		logList.append(["Previous file to which new qc relations are added: " + previousFile])
	if len(FASTAFile) > 0:
		logList.append(["FASTA file to replace identifiers for FASTA headers: " + FASTAFile])
	logList.append(["Category column: %i, protein column: %i" % (catCol, accNumCol)])
	logList.append(["Prefix added to categories: " + catPrefix])
	logList.append(["Output relations file: " + relFile])
	logList.append(["Output log file: " + logFile])
	logList.append([""])

## END REGION: FILE NAMES SETUP			
## **********************************************************
## REGION: PROGRAM BASIC STRUCTURE

	if len(previousFile) > 0: #otherwise, previousList = []
		previousList = stats.load2stringList(previousFile, removeCommas = True)
		header = ""

	AccNum2FASTAHeader = getFASTAHeaders(FASTAFile)
	gc.collect()
	
	DBList = stats.load2stringList(DBFile, removeCommas = True)
	newRelations = getRelations(bigTable = DBList,
							qCol = accNumCol,
							cCol = catCol,
							cPrefix = catPrefix,
							FASTAHeaders = AccNum2FASTAHeader)
	newRelationsSorted = stats.sortByIndex(newRelations, 0)
	relationList = previousList + newRelationsSorted
	
	gc.collect()
							

## END REGION: PROGRAM BASIC STRUCTURE
## **********************************************************
## REGION: SAVING FILES
	
	try:
		stats.saveFile(relFile, relationList, header)
		
		logList.append(["Everything went fine."])
		stats.saveFile(logFile, logList, "LOG FILE")
	except getopt.GetoptError:
		logList.append(["Error."])
		stats.saveFile(logFile, logList, "LOG FILE")
		sys.exit(2)
Exemplo n.º 12
0
def main(argv):

    version = "v0.17"
    analysisName = ""
    analysisFolder = ""
    varianceSeed = 0.001
    FDRLimit = 0.01
    varianceSeedProvided = False
    removeDuplicateUpper = False
    tags = "!out"
    outlierTag = "out"
    logicOperatorsAsWords = False
    dataFile = ""
    relationsFile = ""
    newRelFile = ""
    removedRelFile = ""
    defaultDataFile = "data"
    defaultRelationsFile = "rels"
    defaultTaggedRelFile = "tagged"
    defaultNewRelFile = "cleaned"
    defaultRemovedRelFile = "outliers"
    defaultOutputInfo = "infoFile"
    infoFile = ""
    varFile = ""
    defaultTableExtension = ".tsv"
    defaultTextExtension = ".txt"
    defaultGraphExtension = ".png"
    verbose = True
    oldWay = False  # instead of tagging outliers, separating relations files, the old way
    modeUsed = mode.onePerHigher
    logList = [["SanXoTSieve " + version],
               ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

    try:
        opts, args = getopt.getopt(argv, "a:p:v:d:r:n:L:V:f:ubDhH", [
            "analysis=", "folder=", "varianceseed=", "datafile=", "relfile=",
            "newrelfile=", "outlierrelfile=", "infofile=", "varfile=",
            "fdrlimit=", "one-to-one", "no-verbose", "randomise",
            "removeduplicateupper", "help", "advanced-help", "tags=",
            "outliertag=", "oldway", "word-operators"
        ])
    except getopt.GetoptError:
        logList.append(["Error while getting parameters."])
        stats.saveFile(infoFile, logList, "INFO FILE")
        sys.exit(2)

    if len(opts) == 0:
        printHelp(version)
        sys.exit()

    for opt, arg in opts:
        if opt in ("-a", "--analysis"):
            analysisName = arg
        if opt in ("-p", "--place", "--folder"):
            analysisFolder = arg
        if opt in ("-v", "--var", "--varianceseed"):
            varianceSeed = float(arg)
            varianceSeedProvided = True
        elif opt in ("-d", "--datafile"):
            dataFile = arg
        elif opt in ("-r", "--relfile", "--relationsfile"):
            relationsFile = arg
        elif opt in ("-n", "--newrelfile"):
            removedRelFile = arg
        elif opt in ("-L", "--infofile"):
            infoFile = arg
        elif opt in ("-V", "--varfile"):
            varFile = arg
        elif opt in ("-u", "--one-to-one"):
            modeUsed = mode.onlyOne
        elif opt in ("-b", "--no-verbose"):
            verbose = False
        elif opt in ("--oldway"):
            oldWay = True
        elif opt in ("-f", "--fdrlimit"):
            FDRLimit = float(arg)
        elif opt in ("-D", "--removeduplicateupper"):
            removeDuplicateUpper = True
        elif opt in ("--tags"):
            if arg.strip().lower() != "!out":
                tags = "!out&(" + arg + ")"
        elif opt in ("--word-operators"):
            logicOperatorsAsWords = True
        elif opt in ("--outliertag"):
            outlierTag = "out"
        elif opt in ("-h", "--help"):
            printHelp(version)
            sys.exit()
        elif opt in ("-H", "--advanced-help"):
            printHelp(version, advanced=True)
            sys.exit()

# REGION: FILE NAMES SETUP

    if len(analysisName) == 0:
        if len(dataFile) > 0:
            analysisName = os.path.splitext(os.path.basename(dataFile))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(dataFile) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(dataFile)) > 0:
            analysisFolder = os.path.dirname(dataFile)

    # input
    if len(dataFile) == 0:
        dataFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultDataFile + defaultTableExtension)

    if len(os.path.dirname(dataFile)) == 0 and len(analysisFolder) > 0:
        dataFile = os.path.join(analysisFolder, dataFile)

    if len(os.path.dirname(varFile)) == 0 and len(
            os.path.basename(varFile)) > 0:
        varFile = os.path.join(analysisFolder, varFile)

    if len(varFile) > 0 and not varianceSeedProvided:
        varianceSeed, varianceOk = stats.extractVarianceFromVarFile(
            varFile, verbose=verbose, defaultSeed=varianceSeed)
        if not varianceOk:
            logList.append(["Variance not found in text file."])
            stats.saveFile(infoFile, logList, "INFO FILE")
            sys.exit()

    if len(relationsFile) == 0:
        relationsFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultRelationsFile + defaultTableExtension)

    if len(os.path.dirname(relationsFile)) == 0:
        relationsFile = os.path.join(analysisFolder, relationsFile)

    # output
    if len(newRelFile) == 0:
        if oldWay:  # suffix: "cleaned"
            newRelFile = os.path.join(
                analysisFolder,
                analysisName + "_" + defaultNewRelFile + defaultTableExtension)
        else:  # suffix: "tagged"
            newRelFile = os.path.join(
                analysisFolder, analysisName + "_" + defaultTaggedRelFile +
                defaultTableExtension)

    if len(removedRelFile) == 0:
        removedRelFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultRemovedRelFile + defaultTableExtension)

    if len(os.path.dirname(newRelFile)) == 0:
        newRelFile = os.path.join(analysisFolder, newRelFile)

    if len(os.path.dirname(removedRelFile)) == 0:
        removedRelFile = os.path.join(analysisFolder, removedRelFile)

    if len(infoFile) == 0:
        infoFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutputInfo + defaultTextExtension)

    logList.append(["Variance seed = " + str(varianceSeed)])
    logList.append(["Input data file: " + dataFile])
    logList.append(["Input relations file: " + relationsFile])
    if oldWay:
        logList.append(
            ["Output relations file without outliers: " + newRelFile])
        logList.append(
            ["Output relations file with outliers only: " + removedRelFile])
        logList.append([
            "Removing duplicate higher level elements: " +
            str(removeDuplicateUpper)
        ])
        logList.append([
            "OldWay option activated: outliers are removed instead of tagged"
        ])
    else:
        logList.append(["Relations file tagging outliers: " + newRelFile])
        logList.append(["Tags to filter relations: " + tags])
        logList.append(["Tag used for outliers: " + outlierTag])

    # pp.pprint(logList)
    # sys.exit()

# END REGION: FILE NAMES SETUP

    relations = stats.loadRelationsFile(relationsFile)
    data = stats.loadInputDataFile(dataFile)

    if oldWay:
        # only for backward compatibility. Note that tags are not supported
        newRelations, removedRelations, logResults = \
              getRelationsWithoutOutliers(data,
                relations,
                varianceSeed,
                FDRLimit = FDRLimit,
                modeUsed = modeUsed,
                removeDuplicateUpper = removeDuplicateUpper)
    else:
        newRelations, removedRelations, logResults = \
              tagRelationsWithoutOutliers(data,
                relations,
                varianceSeed,
                FDRLimit = FDRLimit,
                modeUsed = modeUsed,
                removeDuplicateUpper = removeDuplicateUpper,
                tags = tags,
                outlierTag = outlierTag,
                logicOperatorsAsWords = logicOperatorsAsWords)

    if oldWay:
        stats.saveFile(newRelFile, newRelations, "idsup\tidinf")
    else:
        stats.saveFile(newRelFile, newRelations, "idsup\tidinf\ttags")

    stats.saveFile(infoFile, logList, "INFO FILE")

    if oldWay:
        stats.saveFile(removedRelFile, removedRelations, "idsup\tidinf")
Exemplo n.º 13
0
def main(argv):

    version = "v0.04"
    analysisName = ""
    cleaveSites = "KR"  # trypsin default
    removePalindromes = False
    defaultAnalysisName = "inversor"
    analysisFolder = ""
    defaultTableExtension = ".xls"
    defaultTextExtension = ".txt"
    defaultFastaExtension = ".fasta"
    graphFileFormat = "png"
    defaultFastaFile = "fastadef"
    invertedFastaFile = ""
    defaultInvertedFileSuffix = "inv"
    defaultLogFile = "logFile"
    fastaFile = ""
    logFile = ""
    logList = [["Inversor " + version],
               ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

    try:
        opts, args = getopt.getopt(argv, "a:p:f:c:rh", [
            "analysis=", "folder=", "fastafile=", "cleavesites=", "place=",
            "removepalindromes", "help", "egg", "easteregg"
        ])
    except getopt.GetoptError:
        logList.append(["Error while getting parameters."])
        sys.exit(2)

    if len(opts) == 0:
        printHelp(version)
        sys.exit()

    for opt, arg in opts:
        if opt in ("-a", "--analysis"):
            analysisName = arg
        elif opt in ("-p", "--place", "--folder"):
            analysisFolder = arg
        elif opt in ("-f", "--fastafile"):
            fastaFile = arg
        elif opt in ("-c", "--cleavesites"):
            cleaveSites = arg.strip()
        elif opt in ("-r", "--removepalindromes"):
            removePalindromes = True
        elif opt in ("-h", "--help"):
            printHelp(version)
            sys.exit()
        elif opt in ("--egg", "--easteregg"):
            easterEgg()
            sys.exit()

# REGION: FILE NAMES SETUP

    if len(analysisName) == 0:
        if len(fastaFile) > 0:
            analysisName = os.path.splitext(os.path.basename(fastaFile))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(fastaFile) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(fastaFile)) > 0:
            analysisFolder = os.path.dirname(fastaFile)

    # input
    if len(os.path.dirname(fastaFile)) == 0 and len(fastaFile) > 0:
        fastaFile = os.path.join(analysisFolder, fastaFile)

    # output

    if len(invertedFastaFile) == 0:
        invertedFastaFile = os.path.join(
            analysisFolder, analysisName + "_" + defaultInvertedFileSuffix +
            defaultFastaExtension)

    if len(logFile) == 0:
        logFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultLogFile + defaultTextExtension)

    logList.append([""])
    logList.append(["Input FASTA file: " + fastaFile])
    logList.append(["Inverted FASTA file: " + invertedFastaFile])
    logList.append(["Cleave sites used: " + cleaveSites])
    logList.append([""])

    # END REGION: FILE NAMES SETUP

    try:
        readAndWriteNewFASTAFile(fastaFile,
                                 invertedFastaFile,
                                 digestionPoints=cleaveSites,
                                 removePalindromes=removePalindromes)

        logList.append(["Everything went fine."])
    except getopt.GetoptError:
        logList.append(["Error."])
        stats.saveFile(logFile, logList, "LOG FILE")
        sys.exit(2)

    stats.saveFile(logFile, logList, "LOG FILE")
Exemplo n.º 14
0
def main(options, programName, programVersion):

## REGION: DEFAULT VALUES AND VARIABLE ACQUISITION

	# basic default info
	logFile = ""
	analysisName = ""
	analysisFolder = ""
	defaultAnalysisName = programName.lower()
	caFDR = 0.05
	modeSanXoTSieve = "newWay" # alternatively, "oldWay"
	coordination = 0
	caseSensitive = True
	
	# default extensions
	defaultTableExtension = ".xls"
	defaultTextExtension = ".txt"
	
	# default file names
	defaultLogFile = "logFile"
	qcInputFile = "qcInput"
	qcInputFileNoOuts = ""
	caInputFile = "caInput"
	
	# basic log file
	logList = [[programName + " " + programVersion], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

	# parsing arguments from commandline
	options.add_argument("-a", "--analysis", type = str, default = "", required = False, help = "Use a prefix for the output files.")
	options.add_argument("-p", "--place", type = str, default = "", required = False, help = "To use a different common folder for the output files. If this is not provided, the the folder used will be the same as the FASTA file folder.")
	options.add_argument("-L", "--logfile", type = str, default = "", required = False, help = "To use a non-default name for the log file.")
	options.add_argument("-q", "--qcinput", type = str, default = "", required = True, help = "Input outstats file with the q2c integration, including outliers. It must include outliers tagged in new SanXoTSieve files, or, alternatively, it must add the corresponding file with no outliers (see -n argument) for SanXoTSieve in oldWay mode.")
	options.add_argument("-n", "--qcinputnoouts", type = str, default = "", required = False, help = "Input outstats file with the q2c integration, NOT including outliers (using this parameter automatically implies the oldWay option has been used in SanXoTSieve, i.e. removing outliers from the relations file, instead of just tagging them).")
	options.add_argument("-c", "--cainput", type = str, default = "", required = True, help = "Input outstats file with the c2a integration, defining in the FDR column which cateogries are considered to be changing.")
	options.add_argument("--cafdr", type = str, default = "0.05", required = False, help = "To consider a non-default FDR value for changing categories in the c2a integration (default is 0.05).")
	options.add_argument("--caseinsensitive", action='store_true', help = "Consider case insensitive categories and protein identifiers (by default, they are case sensitive).")
		
	arguments = options.parse_args()
	
	# copying parsed arguments
	# copy any arguments used
	if len(arguments.analysis) > 0: analysisName = arguments.analysis
	if len(arguments.place) > 0: analysisFolder = arguments.place
	if len(arguments.logfile) > 0: logFile = arguments.logfile
	if len(arguments.qcinput) > 0: qcInputFile = arguments.qcinput
	if len(arguments.qcinputnoouts) > 0:
		qcInputFileNoOuts = arguments.qcinputnoouts
		modeSanXoTSieve = "oldWay"
	if len(arguments.cainput) > 0: caInputFile = arguments.cainput
	if len(str(arguments.cafdr)) > 0:
		try:
			caFDR = float(arguments.cafdr)
		except:
			message = "Warning: FDR for categories changing could not be parsed, %f will be used instead." % caFDR
			logList.append([""])
			logList.append([message])
			logList.append([""])
			print ""
			print message
			print ""
	caseSensitive = not arguments.caseinsensitive
	
## END REGION: DEFAULT VALUES AND VARIABLE ACQUISITION
## **********************************************************
## REGION: FILE NAMES SETUP

	if len(analysisName) == 0:
		if len(qcInputFile) > 0:
			analysisName = os.path.splitext(os.path.basename(qcInputFile))[0]
		else:
			analysisName = defaultAnalysisName

	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
	
	if len(analysisFolder) == 0:
		analysisFolder = os.getcwd()
	
	# input

	if len(os.path.dirname(qcInputFile)) == 0:
		qcInputFile = os.path.join(analysisFolder, qcInputFile)
	
	if len(os.path.dirname(qcInputFileNoOuts)) == 0 and modeSanXoTSieve == "oldWay":
		qcInputFileNoOuts = os.path.join(analysisFolder, qcInputFileNoOuts)
		
	if len(os.path.dirname(caInputFile)) == 0:
		caInputFile = os.path.join(analysisFolder, caInputFile)
	
	# output
	
	# the only output is the logFile, which includes a last line with the coordination
	if len(logFile) == 0:
		logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension)
	if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0:
		logFile = os.path.join(analysisFolder, logFile)

	logList.append([""])
	logList.append(["Input protein-to-category outstats file: " + qcInputFile])
	logList.append(["Input category-to-all outstats file: " + caInputFile])
	if len(qcInputFileNoOuts) > 0: logList.append(["Input protein-to-category outstats with NO outliers: " + qcInputFileNoOuts])
	logList.append(["Output log file: " + logFile])
	logList.append(["category-to-all FDR used: %f" % caFDR])
	logList.append(["SanXoTSieve mode: " + modeSanXoTSieve])
	logList.append([""])

## END REGION: FILE NAMES SETUP			
## **********************************************************
## REGION: PROGRAM BASIC STRUCTURE

	listChangingCats = getListChangingCats(caInputFile, caFDR)
	numRelsChangingCats, numOutliersChangingCats, numOutliersNonChangingCats = getRels(qcInputFile, listChangingCats, qcInputFileNoOuts, modeSanXoTSieve, caseSensitive)
	
	# explanation
	#	coord = (C - B)/(C + A)
	#	where
	#		C = numRelsChangingCats = qc-relations pointing to categories changing in ca
	#		B = numOutliersChangingCats = outlier qc-relations in categories changing in ca
	#		A = numOutliersNonChangingCats = outlier qc-relations in categories not changing in ca
	#		hence, B + A = outlier qc-relations in any category
	
	coordination = (float(numRelsChangingCats) - float(numOutliersChangingCats)) / (float(numRelsChangingCats) + float(numOutliersNonChangingCats))

## END REGION: PROGRAM BASIC STRUCTURE
## **********************************************************
## REGION: SAVING FILES
	
	try:
		message = "Degree of coordination: %f" % coordination
		logList.append(["Total number of changing categories: %i" % len(listChangingCats)])
		logList.append(["Total number of relations pointing to changing categories: %i" % numRelsChangingCats])
		logList.append(["Total number of outlier relations pointing to changing categories: %i" % numOutliersChangingCats])
		logList.append(["Total number of outlier relations pointing to non-changing categories: %i" % numOutliersNonChangingCats])
		logList.append([message])
		print ""
		print "Find more details in the log file, at: %s" % logFile
		print ""
		print message
		print ""
		
	except getopt.GetoptError:
		logList.append(["Error."])
		stats.saveFile(logFile, logList, "LOG FILE")
		sys.exit(2)
	
	stats.saveFile(logFile, logList, "LOG FILE")
Exemplo n.º 15
0
def main(argv):

	version = "v1.14"
	verbose = False
	similarityLimit = -1.0 # if remain as -1, it will be calculated
	graphLimits = 6.0
	analysisName = ""
	useSubStats = True
	defaultAnalysisName = "sanxot"
	analysisFolder = ""
	# input files
	inStats = ""
	defaultStatsFile = "stats"
	defaultRelationsFile = "rels"
	defaultTableExtension = ".tsv"
	defaultTextExtension = ".txt"
	defaultDOTExtension = ".gv"
	relationsFile = ""
	upperLevelToGraphFile = ""
	# output files
	defaultUpperLevelToGraphFile = "ulst"
	defaultOutputGraph = "simGraph"
	defaultLogFile = "logFile"
	defaultSimilarityMatrixFile = "outSimilarities"
	defaultOutputGVFileName = "simGraph"
	defaultOutputClusterFileName = "outClusters"
	logFile = ""
	graphFile = ""
	dotFile = ""
	outCluster = ""
	similarityMatrixFile = ""
	graphFileFormat = "png"
	altMax = 5
	
	selectedNodeColour = "#ff9090"
	defaultNodeColour = "#ffff80"
	errorNodeColour = "#8080ff"
	minColour = "#00ff00"
	middleColour = "#ffffff"
	maxColour = "#ff0000"
	defaultNodeTextColour = "#000000"
	nonParetoOpacity = 0.5
	
	minFontSize = 10.0
	maxFontSize = 70.0
	graphDPI = 96.0
	graphRatio = 0.0
	
	logList = [["Sanson " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

	try:
		opts, args = getopt.getopt(argv, "a:p:z:r:c:L:G:m:l:d:e:s:d:g:N:bhk", ["analysis=", "folder=", "place=", "statsfile=", "relfile=", "relationsfile=", "list=", "logfile=", "graphfile=", "simfile=", "graphlimits=", "similarity=", "dotfile=", "outcluster=", "graphformat=", "altmax=", "selectednodecolour=", "selectednodecolor=", "defaultnodecolour=", "defaultnodecolor=", "defaultnodetextcolour=", "defaultnodetextcolor=", "errornodecolour=", "errornodecolor=", "mincolour=", "mincolor=", "middlecolour=", "middlecolor=", "maxcolour=", "maxcolor=","nonparetoopacity=", "minfontsize=", "maxfontsize=", "graphdpi=", "graphratio=", "nosubstats", "help", "egg", "easteregg"])
	except getopt.GetoptError:
		logList.append(["Error while getting parameters."])
		sys.exit(2)
	
	if len(opts) == 0:
		printHelp(version)
		sys.exit()
		
	for opt, arg in opts:
		if opt in ("-a", "--analysis"):
			analysisName = arg
		elif opt in ("-p", "--place", "--folder"):
			analysisFolder = arg
		elif opt in ("-z", "--statsfile"):
			inStats = arg
		elif opt in ("-r", "--relfile", "--relationsfile"):
			relationsFile = arg
		elif opt in ("-c", "--list"):
			upperLevelToGraphFile = arg
		elif opt in ("-L", "--logfile"):
			logFile = arg
		elif opt in ("-G", "--graphfile"):
			graphFile = arg
		elif opt in ("-m", "--simfile"):
			similarityMatrixFile = arg
		elif opt in ("-l", "--graphlimits"):
			graphLimits = float(arg)
		elif opt in ("-e", "--similarity"):
			similarityLimit = float(arg)
		elif opt in ("-d", "--dotfile"):
            # begin: jmrc
			dotFile = arg
            # end: jmrc
		elif opt in ("-s", "--outcluster"):
			outCluster = float(arg)
		elif opt in ("-b", "--nosubstats"):
			useSubStats = False
		elif opt in ("--nonparetoopacity"):
			nonParetoOpacity = float(arg)
		elif opt in ("-N", "--altmax"):
			altMax = int(arg)
		elif opt in ("-g", "--graphformat"):
			graphFileFormat = arg.lower().strip()
			if graphFileFormat == "jpeg": graphFileFormat = "jpg"
			if graphFileFormat != "png" and \
				graphFileFormat != "svg" and \
				graphFileFormat != "jpg" and \
				graphFileFormat != "tif" and \
				graphFileFormat != "tiff" and \
				graphFileFormat != "pdf" and \
				graphFileFormat != "bmp" and \
				graphFileFormat != "gif":
				print()
				print("Warning: graph format \"%s\" is not supported,\npng will be used instead." % graphFileFormat)
				print()
				graphFileFormat = "png"
		
		elif opt in("--selectednodecolour", "--selectednodecolor"):
			selectedNodeColour = arg
		elif opt in("--defaultnodecolour", "--defaultnodecolor"):
			defaultNodeColour = arg
		elif opt in("--defaultnodetextcolour", "--defaultnodetextcolor"):
			defaultNodeTextColour = arg
		elif opt in("--errornodecolour", "--errornodecolor"):
			errorNodeColour = arg
		elif opt in("--mincolour", "--mincolor"):
			minColour = arg
		elif opt in("--middlecolour", "--middlecolor"):
			middleColour = arg
		elif opt in("--maxcolour", "--maxcolor"):
			maxColour = arg
		elif opt in("--minfontsize"):
			minFontSize = float(arg)
		elif opt in("--maxfontsize"):
			maxFontSize = float(arg)
		elif opt in("--graphdpi"):
			graphDPI = float(arg)
		elif opt in("--graphratio"):
			graphRatio = float(arg)
		
		elif opt in ("-h", "--help"):
			printHelp(version)
			sys.exit()
		elif opt in ("--egg", "--easteregg"):
			easterEgg()
			sys.exit()

# REGION: FILE NAMES SETUP

	if minFontSize > maxFontSize: minFontSize = maxFontSize
	defaultGraphExtension = "." + graphFileFormat
	
	if len(analysisName) == 0:
		if len(inStats) > 0:
			analysisName = os.path.splitext(os.path.basename(inStats))[0]
		else:
			analysisName = defaultAnalysisName

	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
			
	if len(inStats) > 0 and len(analysisFolder) == 0:
		if len(os.path.dirname(inStats)) > 0:
			analysisFolder = os.path.dirname(inStats)

	# input
	if len(inStats) == 0:
		inStats = os.path.join(analysisFolder, analysisName + "_" + defaultStatsFile + defaultTableExtension)
		
	if len(os.path.dirname(inStats)) == 0 and len(analysisFolder) > 0:
		inStats = os.path.join(analysisFolder, inStats)

	if len(relationsFile) == 0:
		relationsFile = os.path.join(analysisFolder, analysisName + "_" + defaultRelationsFile + defaultTableExtension)
		
	if len(upperLevelToGraphFile) == 0:
		upperLevelToGraphFile = os.path.join(analysisFolder, analysisName + "_" + defaultUpperLevelToGraphFile + defaultTextExtension)
	
	if len(os.path.dirname(relationsFile)) == 0 and len(os.path.basename(relationsFile)) > 0:
		relationsFile = os.path.join(analysisFolder, relationsFile)

	if len(os.path.dirname(upperLevelToGraphFile)) == 0 and len(os.path.basename(upperLevelToGraphFile)) > 0:
		upperLevelToGraphFile = os.path.join(analysisFolder, upperLevelToGraphFile)
		
	# output
	if len(dotFile) == 0:
		dotFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGVFileName + defaultDOTExtension)
	
	if len(outCluster) == 0:
		outCluster = os.path.join(analysisFolder, analysisName + "_" + defaultOutputClusterFileName + defaultTableExtension)
	
	if len(similarityMatrixFile) == 0:
		similarityMatrixFile = os.path.join(analysisFolder, analysisName + "_" + defaultSimilarityMatrixFile + defaultTextExtension)
		
	if len(logFile) == 0:
		logFile = os.path.join(analysisFolder, analysisName + "_" + defaultLogFile + defaultTextExtension)
		
	if len(graphFile) == 0:
		graphFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputGraph + defaultGraphExtension)

	if len(os.path.dirname(dotFile)) == 0 and len(os.path.basename(dotFile)) > 0:
		dotFile = os.path.join(analysisFolder, dotFile)

	if len(os.path.dirname(outCluster)) == 0 and len(os.path.basename(outCluster)) > 0:
		outCluster = os.path.join(analysisFolder, outCluster)
		
	if len(os.path.dirname(similarityMatrixFile)) == 0 and len(os.path.basename(similarityMatrixFile)) > 0:
		similarityMatrixFile = os.path.join(analysisFolder, similarityMatrixFile)
		
	if len(os.path.dirname(logFile)) == 0 and len(os.path.basename(logFile)) > 0:
		logFile = os.path.join(analysisFolder, logFile)
		
	if len(os.path.dirname(graphFile)) == 0 and len(os.path.basename(graphFile)) > 0:
		graphFile = os.path.join(analysisFolder, graphFile)
		
	logList.append([""])
	logList.append(["Input stats file: " + inStats])
	logList.append(["Relations file: " + relationsFile])
	logList.append(["File with sigmoids to depict: " + upperLevelToGraphFile])
	logList.append(["Output similarity matrix table: " + similarityMatrixFile])
	logList.append(["Output log file: " + logFile])
	logList.append(["Output graph file: " + graphFile])
	logList.append([""])
	if useSubStats: logList.append(["Filling nodes with Z from lower elemenets"])
	else: logList.append(["Filling nodes with Z from upper elements"])
	logList.append([""])

	# pp.pprint(logList)
	# sys.exit()

# END REGION: FILE NAMES SETUP			
	
	try:
		data, extraData, logListExtraInfo = associateElements(inStats = inStats, uFile = upperLevelToGraphFile, relFile = relationsFile)
		logList.append(logListExtraInfo)
		logList.append(["Data files correctly loaded."])
	except getopt.GetoptError:
		logList.append(["Error while getting data files."])
		stats.saveFile(logFile, logList, "LOG FILE")
		sys.exit(2)
	
	if len(data) == 0:
		logList.append([""])
		errorMessage = "No data were retrieved to create the similarity graph."
		print(errorMessage)
		print("Exiting...")
		logList.append([errorMessage])
	else:
		
		similarityMatrix, NMatrix = SMatrix(data)
		
		stats.saveFile(similarityMatrixFile, similarityMatrix)
		
		if useSubStats:
			# añadir salida para log ***
			subData = stats.arrangeSubData(inStats = inStats,
							uFile = upperLevelToGraphFile,
							relFile = relationsFile,
							ignoreNaNsInFDR = True)
		else:
			subData = None
		
		if similarityLimit < 0.0 or similarityLimit > 1.0:
			# means it has to be calculated
			# this includes the default value = -1
			FNumber, bestBooleanSimMatrix, bestClusterVector, CNumber = \
				getBestFNumber(similarityMatrix,
					verbose = False,
					stepFNumber = 0.01,
					initialFNumber = 0.0,
					finalFNumber = 1.0)
			
			logList.append([""])
			logList.append(["Creating DOT graph for the best FNumber = %f." % FNumber])
			logList.append(["At least %i graphs contain %i nodes." % (CNumber, CNumber)])
		else:
			FNumber, bestBooleanSimMatrix, bestClusterVector, CNumber = \
				getBestFNumber(similarityMatrix,
					verbose = False,
					stepFNumber = 0.0,
					initialFNumber = similarityLimit,
					finalFNumber = similarityLimit)
					
			logList.append([""])
			logList.append(["Creating DOT graph for the given FNumber = %f." % FNumber])
			
		# begin: jmrc
		# print("Best FNumber: %f" % FNumber)
        # end: jmrc
		
		paretoInfo, extraDataWithClusters = getParetoInfo(clusterVector = bestClusterVector,
						extraData = extraData)
						
		# stats.saveFile(outCluster, bestClusterVector, "CLUSTERS IDENTIFIED")
		stats.saveFile(outCluster, extraDataWithClusters, "id\tn\tZ\tFDR\tX\tcluster id\tPareto front?")
		
		createDOTGraph(similarityMatrix,
						simLimit = FNumber,
						outputGVFile = dotFile,
						simGraphFile = graphFile,
						extraData = extraData,
						subData = subData,
						NMatrix = NMatrix,
						graphLimits = graphLimits,
						graphFileFormat = graphFileFormat,
						altMax = altMax,
						defaultNodeColour = defaultNodeColour,
						errorNodeColour = errorNodeColour,
						minColour = minColour,
						middleColour = middleColour,
						maxColour = maxColour,
						defaultNodeTextColour = defaultNodeTextColour,
						nonParetoOpacity = nonParetoOpacity,
						paretoInfo = paretoInfo,
						minFontSize = minFontSize,
						maxFontSize = maxFontSize,
						graphDPI = graphDPI,
						graphRatio = graphRatio)
	
	stats.saveFile(logFile, logList, "LOG FILE")
Exemplo n.º 16
0
def main(argv):
    version = "v0.01"
    analysisName = ""
    analysisFolder = ""
    relationsFile = ""
    dataFiles = ""
    outputFile = ""
    defaultOutput = "OutStats"
    defaultOutputInfo = "infoFile"
    defaultRelationsFile = "rels"
    defaultDataFiles = "datafile"
    defaultTableExtension = ".xls"
    defaultTextExtension = ".txt"
    defaultGraphExtension = ".png"
    defaultAnalysisName = "xpvpAnalysis"
    infoFile = ""
    logList = [["XVpCollector " + version],
               ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]
    try:
        opts, args = getopt.getopt(argv, "a:p:r:o:d:h", [
            "analysis=", "folder=", "relfile=", "outputfile=", "dataFile=",
            "help"
        ])
    except getopt.GetoptError:
        logList.append(["Error while getting parameters."])
        stats.saveFile(infoFile, logList, "INFO FILE")
        sys.exit(2)
    if len(opts) == 0:
        printHelp(version)
        sys.exit()
    for opt, arg in opts:
        if opt in ("-a", "--analysis"):
            analysisName = arg
        if opt in ("-p", "--place", "--folder"):
            analysisFolder = arg
        if opt in ("-r", "--relfile"):
            relationsFile = arg
        if opt in ("-o", "--outputfile"):
            outputFile = arg
        if opt in ("-d", "--dataFile"):
            dataFiles = arg
        elif opt in ("-h", "--help"):
            printHelp(version)

# REGION: FILE NAMES SETUP
    if len(analysisName) == 0:
        if len(dataFiles) > 0:
            analysisName = os.path.splitext(os.path.basename(dataFiles))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(dataFiles) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(dataFiles)) > 0:
            analysisFolder = os.path.dirname(dataFiles)

# input
    if len(dataFiles) == 0:
        dataFiles = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultDataFiles + defaultTableExtension)

    if len(os.path.dirname(dataFiles)) == 0 and len(analysisFolder) > 0:
        dataFiles = os.path.join(analysisFolder, dataFiles)

    if len(relationsFile) == 0:
        relationsFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultRelationsFile + defaultTableExtension)

    if len(os.path.dirname(relationsFile)) == 0 and len(analysisFolder) > 0:
        relationsFile = os.path.join(analysisFolder, relationsFile)

    # output
    if len(outputFile) == 0:
        outputFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutput + defaultTableExtension)
    else:
        if len(os.path.dirname(outputFile)) == 0:
            outputFile = os.path.join(analysisFolder, outputFile)

    if len(infoFile) == 0:
        infoFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutputInfo + defaultTableExtension)

    logList.append(["Input dataFiles " + str(dataFiles)])
    logList.append(["Input relations file: " + relationsFile])
    logList.append(["Input dataFile: " + dataFiles])
    logList.append(["Output stats file: " + outputFile])
    logList.append(["Output info file: " + infoFile])

    outputList = X_VpCollector(dataFiles=dataFiles,
                               relationsFile=relationsFile)
    header = "idsup\tX'sup,\t,V'sup"  #"Sequence\tFASTAProteinDescription\tXp\tVp\tcount\tZp" "idsup\tX'sup,\t,V'sup"
    #######("Fix the header for Xp and Vp")
    stats.saveFile(outputFile, outputList, header)

    if len(infoFile) > 0:
        stats.saveFile(infoFile, logList, "INFO FILE")
Exemplo n.º 17
0
def main(argv):

	# begin: jmrc
	# version = "v1.17"
	version = "v1.19"
	# end: jmrc
	fileName = ""
	outFile = ""
	iField = ""
	jField = ""
	kField = ""
	lField = ""
	c5Field = ""
	analysisName = ""
	filterString = ""
	inputSeparator = "\t"
	useNumbers = False
	logicOperatorsAsWords = False # False = Python-style operators (&&, ||), True = word-like operators (\and\ \or\)
	curlyBrackets = False # False = normal brackets (), True = curly brackets {}
	analysisFolder = ""
	defaultFileName = "QuiXML"
	defaultOutputFile = "table"
	defaultOutputLog = "log"
	defaultTableExtension = ".tsv"
	defaultTextExtension = ".txt"
	defaultGraphExtension = ".png"
	defaultXMLExtension = ".xml"
	defaultAnalysisName = "aljamia"
	removeDuplicates = True
	# begin: jmrc
	removeEmptyRows = False
	# end: jmrc
	allowOperationsInFields = ""
	tableId = "peptide_match" # default for QuiXML
	initialRow = 1 # for xls coming from QuiXML should be 25
	logFile = ""
	logList = [["Aljamia " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]
	
	try:
		# begin: jmrc
		# opts, args = getopt.getopt(argv, "a:p:x:o:i:j:k:l:t:R:L:f:F:A:cdwh", ["input=", "filename=", "place=", "folder=", "outfile=", "c1=", "c2=", "c3=", "c4=", "c5=", "table=", "initialrow=", "logfile=", "filter=", "allow-operations=", "curly-brackets", "allow-duplicates", "word-operators", "inputseparator", "help", "egg", "easteregg"])
		opts, args = getopt.getopt(argv, "a:p:x:o:i:j:k:l:t:R:L:f:F:A:e:cdwh", ["input=", "filename=", "place=", "folder=", "outfile=", "c1=", "c2=", "c3=", "c4=", "c5=", "table=", "initialrow=", "logfile=", "filter=", "allow-operations=", "curly-brackets", "allow-duplicates", "remove-empty=", "word-operators", "inputseparator", "help", "egg", "easteregg"])
		# end: jmrc
	except getopt.GetoptError:
		logList.append(["Error while getting parameters."])
		stats.saveFile(logFile, logList, "LOG FILE")
		sys.exit(2)
	
	if len(opts) == 0:
		printHelp(version)
		sys.exit()
		
	for opt, arg in opts:
		if opt in ("-a", "--analysis"):
			analysisName = arg
		if opt in ("-p", "--place", "--folder"):
			analysisFolder = arg
		if opt in ("-x", "--input", "--filename"):
			fileName = arg
		if opt in ("-o", "--output"):
			outFile = arg
		elif opt in ("-i", "--c1"):
			iField = arg.strip()
		elif opt in ("-j", "--c2"):
			jField = arg.strip()
		elif opt in ("-k", "--c3"):
			kField = arg.strip()
		elif opt in ("-l", "--c4"):
			lField = arg.strip()
		elif opt in ("--c5"):
			c5Field = arg.strip()
		elif opt in ("-t", "--table"):
			tableId = int(arg) # *** check this: int or string?
		elif opt in ("-R", "--initialrow"):
			initialRow = int(arg)
		elif opt in ("-L", "--logfile"):
			logFile = arg
		elif opt in ("-f", "--filter"):
			filterString = arg
			useNumbers = False
		elif opt in ("-F", "--filter-using-numbers"):
			filterString = arg
			useNumbers = True
		elif opt in ("-A", "--allow-operations"):
			allowOperationsInFields = str(arg).strip()
		elif opt in ("-d", "--allow-duplicates"):
			removeDuplicates = False
		# begin: jmrc
		elif opt in ("-e", "--remove-empty"):
			removeEmptyRows = arg
		# end: jmrc
		elif opt in ("-c", "--curly-brackets"):
			curlyBrackets = True
		elif opt in ("-w", "--word-operators"):
			logicOperatorsAsWords = True
		elif opt in ("-s", "--inputseparator"):
			inputSeparator = str(arg)[0]
		elif opt in ("-h", "--help"):
			printHelp(version)
			sys.exit()
		elif opt in ("--egg", "--easteregg"):
			easterEgg()
			sys.exit()

			
# REGION: FILE NAMES SETUP
	
	if len(analysisName) == 0:
		if len(fileName) > 0:
			analysisName = os.path.splitext(os.path.basename(fileName))[0]
		else:
			analysisName = defaultAnalysisName
	
	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
			
	if len(fileName) > 0 and len(analysisFolder) == 0:
		if len(os.path.dirname(fileName)) > 0:
			analysisFolder = os.path.dirname(fileName)
	# input
	if len(fileName) == 0:
		fileName = os.path.join(analysisFolder, analysisName + "_" + defaultFileName + defaultXMLExtension)
		
	if len(os.path.dirname(fileName)) == 0:
		fileName = os.path.join(analysisFolder, fileName)
	
	# output
	if len(outFile) == 0:
		outFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputFile + defaultTableExtension)
	else:
		if len(os.path.dirname(outFile)) == 0:
			outFile = os.path.join(analysisFolder, outFile)
	
	if len(logFile) == 0:
		logFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputLog + defaultTextExtension)
	
	logList.append(["Input file: " + fileName])
	logList.append(["Output file: " + outFile])
	logList.append(["Log file: " + logFile])
	
# END REGION: FILE NAMES SETUP

	# errorsFound = findErrors(xmlDocument, tableId, iField, jField, kField, lField)
	
	# if len(errorsFound) > 0:
		# for error in errorsFound:
			# print error
		# sys.exit()
	# else:
		# pass

	if os.path.splitext(fileName)[1] == ".xml":

		try:
			#xmlDocument = dom.parse(fileName)
			xmlDocument = ElementTree.parse(fileName).getroot()
		except:
			print("Error while reading xml file2.")
			logList.append(["Error while reading xml file."])
			stats.saveFile(logFile, logList, "LOG FILE")
			sys.exit(2)
		
		resultingData = getDataFromXML(xmlDocument,
										Field,
										jField,
										kField,
										lField,
										c5Field,
										tableId = tableId,
										filterString = filterString,
										removeDuplicates = removeDuplicates,
										useNumbers = useNumbers,
										logicOperatorsAsWords = logicOperatorsAsWords,
										curlyBrackets = curlyBrackets)
		
	else: # then it should be a tsv
		resultingData = getDataFromTXT(fileName,
										iField,
										jField,
										kField,
										lField,
										c5Field,
										filterString = filterString,
										removeDuplicates = removeDuplicates,
										# begin: jmrc
										removeEmptyRows = removeEmptyRows,
										# end: jmrc
										initialRow = initialRow,
										allowOperationsInFields = allowOperationsInFields,
										useNumbers = useNumbers,
										logicOperatorsAsWords = logicOperatorsAsWords,
										curlyBrackets = curlyBrackets,
										inputSeparator = inputSeparator)
	
	iTab = ""
	if len(iField) > 0: iTab = "%s\t" % iField
	jTab = ""
	if len(jField) > 0: jTab = "%s\t" % jField
	kTab = ""
	if len(kField) > 0: kTab = "%s\t" % kField
	lTab = ""
	if len(lField) > 0: lTab = "%s\t" % lField
	c5Tab = ""
	if len(c5Field) > 0: c5Tab = "%s\t" % c5Field
	
	header = iTab + jTab + kTab + lTab + c5Tab
	header = header[:len(header) - 1]
	
	stats.saveFile(outFile, resultingData, header)
Exemplo n.º 18
0
def showGraphTool(inputRawData,
					inputRelations,
					k,
					variance,
					alpha,
					medianSide,
					verbose = False,
					showRank = False,
					graphFile = None,
					graphData = None,
					dpi = None,
					showGraph = True):
	
	plt.clf()
	inputRawData.sort()
	inputRelations.sort()
	
	windowWidth = medianSide * 2 + 1
	if len(inputRawData) < windowWidth:
		print('Error: window for median is bigger than total input size')
		sys.exit()
	
	# output = makeStats(k, variance, input = input)
	nextIdXData = getNextIdX_klibrate(inputRawData, inputRelations, k, variance, alpha, giveMergedData = True)
	
	nextIdX = nextIdXData[0]
	mergedData = nextIdXData[1]

	MADdistrOut, weights = getMADDistribution(nextIdX, mergedData, k, variance, alpha, medianSide)
	invOfFitOut = getInverseOfFit(mergedData, k, variance, alpha)

	MADdistrOut = MADdistrOut[medianSide:len(MADdistrOut) - medianSide + 1]
	invOfFitOut = invOfFitOut[medianSide:len(invOfFitOut) - medianSide + 1]
	weights = weights[medianSide:len(weights) - medianSide + 1]
	
	# folderToSave = "D:\\DATUMARO\\trabajo\\programas_repositorio\\BioSistemas SanXoT\\MicroArrays\\intento5 CvsCs usando var robusta por partes\\"
	# stats.saveFile(folderToSave + "MADdistrOut.txt", MADdistrOut)
	# stats.saveFile(folderToSave + "invOfFitOut.txt", invOfFitOut)
	# stats.saveFile(folderToSave + "weights.txt", weights)
	
	if showRank:
		plt.plot(list(range(len(MADdistrOut))), MADdistrOut, 'g.', list(range(len(invOfFitOut))), invOfFitOut, 'r', linewidth=1.0, markersize=2.0, markeredgewidth=0.0)
		plt.xlabel('rank($V_s$)')
		plt.ylabel('1 / MSD')
		
		# to save data
		# *** use a better filename
		dataToSave = []
		for i in range(len(MADdistrOut)):
			dataToSave.append([i, weights[i], MADdistrOut[i], invOfFitOut[i]])
			
		if graphData:
			stats.saveFile(graphData, dataToSave, "rank(Vs)\tweight\tMAD\t1/fit")
	else:
	
		# uncomment to graph MSD instead of 1 / MSD
		#
		# for i in xrange(len(invOfFitOut)):
			# invOfFitOut[i] = 1 / invOfFitOut[i]
		# for i in xrange(len(MADdistrOut)):
			# MADdistrOut[i] = 1 / MADdistrOut[i]
	
		plt.plot(weights, MADdistrOut, 'g.', weights, invOfFitOut, 'r', linewidth=1.0, markersize=2.0, markeredgewidth=0.0)
		plt.xlabel('($V_s$)')
		plt.ylabel('1 / MSD')
	
	plt.grid(True)
	plt.title('k = %g, $\sigma^2$ = %g, alpha = %g' % (k, variance, alpha))

	if graphFile:
		plt.savefig(graphFile, dpi = dpi)
	
	if showGraph:
		plt.show()
Exemplo n.º 19
0
def main(argv):

	version = "v1.17"
	fileName = ""
	outFile = ""
	iField = ""
	jField = ""
	kField = ""
	lField = ""
	c5Field = ""
	analysisName = ""
	filterString = ""
	inputSeparator = "\t"
	useNumbers = False
	logicOperatorsAsWords = False # False = Python-style operators (&&, ||), True = word-like operators (\and\ \or\)
	curlyBrackets = False # False = normal brackets (), True = curly brackets {}
	analysisFolder = ""
	defaultFileName = "QuiXML"
	defaultOutputFile = "table"
	defaultOutputLog = "log"
	defaultTableExtension = ".tsv"
	defaultTextExtension = ".txt"
	defaultGraphExtension = ".png"
	defaultXMLExtension = ".xml"
	defaultAnalysisName = "aljamia"
	removeDuplicates = True
	allowOperationsInFields = ""
	tableId = "peptide_match" # default for QuiXML
	initialRow = 1 # for xls coming from QuiXML should be 25
	logFile = ""
	logList = [["Aljamia " + version], ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]
	
	try:
		opts, args = getopt.getopt(argv, "a:p:x:o:i:j:k:l:t:R:L:f:F:A:cdwh", ["input=", "filename=", "place=", "folder=", "outfile=", "c1=", "c2=", "c3=", "c4=", "c5=", "table=", "initialrow=", "logfile=", "filter=", "allow-operations=", "curly-brackets", "allow-duplicates", "word-operators", "inputseparator", "help", "egg", "easteregg"])
	except getopt.GetoptError:
		logList.append(["Error while getting parameters."])
		stats.saveFile(logFile, logList, "LOG FILE")
		sys.exit(2)
	
	if len(opts) == 0:
		printHelp(version)
		sys.exit()
		
	for opt, arg in opts:
		if opt in ("-a", "--analysis"):
			analysisName = arg
		if opt in ("-p", "--place", "--folder"):
			analysisFolder = arg
		if opt in ("-x", "--input", "--filename"):
			fileName = arg
		if opt in ("-o", "--output"):
			outFile = arg
		elif opt in ("-i", "--c1"):
			iField = arg.strip()
		elif opt in ("-j", "--c2"):
			jField = arg.strip()
		elif opt in ("-k", "--c3"):
			kField = arg.strip()
		elif opt in ("-l", "--c4"):
			lField = arg.strip()
		elif opt in ("--c5"):
			c5Field = arg.strip()
		elif opt in ("-t", "--table"):
			tableId = int(arg) # *** check this: int or string?
		elif opt in ("-R", "--initialrow"):
			initialRow = int(arg)
		elif opt in ("-L", "--logfile"):
			logFile = arg
		elif opt in ("-f", "--filter"):
			filterString = arg
			useNumbers = False
		elif opt in ("-F", "--filter-using-numbers"):
			filterString = arg
			useNumbers = True
		elif opt in ("-A", "--allow-operations"):
			allowOperationsInFields = str(arg).strip()
		elif opt in ("-d", "--allow-duplicates"):
			removeDuplicates = False
		elif opt in ("-c", "--curly-brackets"):
			curlyBrackets = True
		elif opt in ("-w", "--word-operators"):
			logicOperatorsAsWords = True
		elif opt in ("-s", "--inputseparator"):
			inputSeparator = str(arg)[0]
		elif opt in ("-h", "--help"):
			printHelp(version)
			sys.exit()
		elif opt in ("--egg", "--easteregg"):
			easterEgg()
			sys.exit()

			
# REGION: FILE NAMES SETUP
	
	if len(analysisName) == 0:
		if len(fileName) > 0:
			analysisName = os.path.splitext(os.path.basename(fileName))[0]
		else:
			analysisName = defaultAnalysisName
	
	if len(os.path.dirname(analysisName)) > 0:
		analysisNameFirstPart = os.path.dirname(analysisName)
		analysisName = os.path.basename(analysisName)
		if len(analysisFolder) == 0:
			analysisFolder = analysisNameFirstPart
			
	if len(fileName) > 0 and len(analysisFolder) == 0:
		if len(os.path.dirname(fileName)) > 0:
			analysisFolder = os.path.dirname(fileName)
	# input
	if len(fileName) == 0:
		fileName = os.path.join(analysisFolder, analysisName + "_" + defaultFileName + defaultXMLExtension)
		
	if len(os.path.dirname(fileName)) == 0:
		fileName = os.path.join(analysisFolder, fileName)
	
	# output
	if len(outFile) == 0:
		outFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputFile + defaultTableExtension)
	else:
		if len(os.path.dirname(outFile)) == 0:
			outFile = os.path.join(analysisFolder, outFile)
	
	if len(logFile) == 0:
		logFile = os.path.join(analysisFolder, analysisName + "_" + defaultOutputLog + defaultTextExtension)
	
	logList.append(["Input file: " + fileName])
	logList.append(["Output file: " + outFile])
	logList.append(["Log file: " + logFile])
	
# END REGION: FILE NAMES SETUP

	# errorsFound = findErrors(xmlDocument, tableId, iField, jField, kField, lField)
	
	# if len(errorsFound) > 0:
		# for error in errorsFound:
			# print error
		# sys.exit()
	# else:
		# pass

	if os.path.splitext(fileName)[1] == ".xml":

		try:
			#xmlDocument = dom.parse(fileName)
			xmlDocument = ElementTree.parse(fileName).getroot()
		except:
			print("Error while reading xml file.")
			logList.append(["Error while reading xml file."])
			stats.saveFile(logFile, logList, "LOG FILE")
			sys.exit(2)
		
		resultingData = getDataFromXML(xmlDocument,
										Field,
										jField,
										kField,
										lField,
										c5Field,
										tableId = tableId,
										filterString = filterString,
										removeDuplicates = removeDuplicates,
										useNumbers = useNumbers,
										logicOperatorsAsWords = logicOperatorsAsWords,
										curlyBrackets = curlyBrackets)
		
	else: # then it should be a tsv
		resultingData = getDataFromTXT(fileName,
										iField,
										jField,
										kField,
										lField,
										c5Field,
										filterString = filterString,
										removeDuplicates = removeDuplicates,
										initialRow = initialRow,
										allowOperationsInFields = allowOperationsInFields,
										useNumbers = useNumbers,
										logicOperatorsAsWords = logicOperatorsAsWords,
										curlyBrackets = curlyBrackets,
										inputSeparator = inputSeparator)
	
	iTab = ""
	if len(iField) > 0: iTab = "%s\t" % iField
	jTab = ""
	if len(jField) > 0: jTab = "%s\t" % jField
	kTab = ""
	if len(kField) > 0: kTab = "%s\t" % kField
	lTab = ""
	if len(lField) > 0: lTab = "%s\t" % lField
	c5Tab = ""
	if len(c5Field) > 0: c5Tab = "%s\t" % c5Field
	
	header = iTab + jTab + kTab + lTab + c5Tab
	header = header[:len(header) - 1]
	
	stats.saveFile(outFile, resultingData, header)
	
	if len(logFile) > 0:
		stats.saveFile(logFile, logList, "LOG FILE")
Exemplo n.º 20
0
def main(argv):
    version = "v0.08"
    analysisName = ""
    analysisFolder = ""
    relationsFile = ""
    modifiedPeptidesFile = ""
    modifiedPeptidesFile1 = ""
    nonModifiedPep2ProtFile = ""
    pep2protein = ""
    varFile = ""
    varFile1 = ""
    outputFile = ""
    defaultModifiedPeptidesFile = "modPepFile"
    defaultModifiedPeptidesFile1 = "modPepFile1"
    defaultnonModifiedPeptidesFile = "nonmodPepFile"
    defaultpep2protein = "Pep2proteinFile"
    defaultOutput = "ModOutStats"
    defaultOutputInfo = "infoFile"
    defaultRelationsFile = "rels"
    defaultTableExtension = ".xls"
    defaultTextExtension = ".txt"
    defaultGraphExtension = ".png"
    defaultAnalysisName = "ghostanalysis"
    infoFile = ""
    logList = [["SanXoTGhost " + version],
               ["Start: " + strftime("%Y-%m-%d %H:%M:%S")]]

    try:
        opts, args = getopt.getopt(argv, "a:p:r:m:M:n:t:o:V:W:L:h", [
            "analysis=", "folder=", "relfile=", "modfile=", "modfile1=",
            "nonmodfile=", "pep2pro=", "outputfile=", "varfile=", "varFile1=",
            "infofile=", "help"
        ])
    except getopt.GetoptError:
        logList.append(["Error while getting parameters."])
        stats.saveFile(infoFile, logList, "INFO FILE")
        sys.exit(2)

    if len(opts) == 0:
        printHelp(version)
        sys.exit()

    for opt, arg in opts:
        if opt in ("-a", "--analysis"):
            analysisName = arg
        if opt in ("-p", "--place", "--folder"):
            analysisFolder = arg
        if opt in ("-m", "--modfile"):
            modifiedPeptidesFile = arg
        if opt in ("-M", "--modfile1"):
            modifiedPeptidesFile1 = arg
        if opt in ("-n", "--nonmodfile"):
            nonModifiedPep2ProtFile = arg
        if opt in ("-t", "--pep2pro"):
            pep2protein = arg
        if opt in ("-o", "--outputfile"):
            outputFile = arg
        if opt in ("-r", "--relfile"):
            relationsFile = arg
        if opt in ("-V", "--varfile"):
            varFile = arg
        if opt in ("-W", "--varfile1"):
            varFile1 = arg
        if opt in ("-L", "--infofile"):
            infoFile = arg
        elif opt in ("-h", "--help"):
            printHelp(version)

# REGION: FILE NAMES SETUP
    if len(analysisName) == 0:
        if len(modifiedPeptidesFile) > 0:
            analysisName = os.path.splitext(
                os.path.basename(modifiedPeptidesFile))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(modifiedPeptidesFile) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(modifiedPeptidesFile)) > 0:
            analysisFolder = os.path.dirname(modifiedPeptidesFile)

    if len(analysisName) == 0:
        if len(modifiedPeptidesFile1) > 0:
            analysisName = os.path.splitext(
                os.path.basename(modifiedPeptidesFile1))[0]
        else:
            analysisName = defaultAnalysisName

    if len(os.path.dirname(analysisName)) > 0:
        analysisNameFirstPart = os.path.dirname(analysisName)
        analysisName = os.path.basename(analysisName)
        if len(analysisFolder) == 0:
            analysisFolder = analysisNameFirstPart

    if len(modifiedPeptidesFile1) > 0 and len(analysisFolder) == 0:
        if len(os.path.dirname(modifiedPeptidesFile1)) > 0:
            analysisFolder = os.path.dirname(modifiedPeptidesFile1)

# input
    if len(modifiedPeptidesFile) == 0:
        modifiedPeptidesFile = os.path.join(
            analysisFolder, analysisName + "_" + defaultModifiedPeptidesFile +
            defaultTableExtension)

    if len(os.path.dirname(
            modifiedPeptidesFile)) == 0 and len(analysisFolder) > 0:
        modifiedPeptidesFile = os.path.join(analysisFolder,
                                            modifiedPeptidesFile)

    if len(modifiedPeptidesFile1) == 0:
        modifiedPeptidesFile1 = os.path.join(
            analysisFolder, analysisName + "_" + defaultModifiedPeptidesFile1 +
            defaultTableExtension)

    if len(os.path.dirname(
            modifiedPeptidesFile1)) == 0 and len(analysisFolder) > 0:
        modifiedPeptidesFile1 = os.path.join(analysisFolder,
                                             modifiedPeptidesFile1)

    if len(nonModifiedPep2ProtFile) == 0:
        nonModifiedPep2ProtFile = os.path.join(
            analysisFolder, analysisName + "_" +
            defaultnonModifiedPeptidesFile + defaultTableExtension)

    if len(os.path.dirname(
            nonModifiedPep2ProtFile)) == 0 and len(analysisFolder) > 0:
        nonModifiedPep2ProtFile = os.path.join(analysisFolder,
                                               nonModifiedPep2ProtFile)

    if len(relationsFile) == 0:
        relationsFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultRelationsFile + defaultTableExtension)

    if len(os.path.dirname(relationsFile)) == 0 and len(analysisFolder) > 0:
        relationsFile = os.path.join(analysisFolder, relationsFile)

    if len(os.path.dirname(varFile)) == 0 and len(
            os.path.basename(varFile)) > 0:
        varFile = os.path.join(analysisFolder, varFile)

    if len(os.path.dirname(varFile1)) == 0 and len(
            os.path.basename(varFile1)) > 0:
        varFile1 = os.path.join(analysisFolder, varFile1)

    if len(pep2protein) == 0:
        pep2protein = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultpep2protein + defaultTableExtension)

    if len(os.path.dirname(pep2protein)) == 0 and len(analysisFolder) > 0:
        pep2protein = os.path.join(analysisFolder, pep2protein)
# ************** reviewed up to here

# output
    if len(outputFile) == 0:
        outputFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutput + defaultTableExtension)
    else:
        if len(os.path.dirname(outputFile)) == 0:
            outputFile = os.path.join(analysisFolder, outputFile)

    if len(infoFile) == 0:
        infoFile = os.path.join(
            analysisFolder,
            analysisName + "_" + defaultOutputInfo + defaultTableExtension)

    logList.append(["Input modifiedPeptideFile " + str(modifiedPeptidesFile)])
    logList.append(
        ["Input modifiedPeptideFile1 " + str(modifiedPeptidesFile1)])
    logList.append(
        ["Input nonMod_PepTOpro_File " + str(nonModifiedPep2ProtFile)])
    logList.append(["Input pep to protein file " + str(pep2protein)])
    logList.append(["Input relations file: " + relationsFile])
    logList.append(["Input varianceFile: " + varFile])
    logList.append(["Input Second_varianceFile: " + varFile1])
    logList.append(["Output stats file: " + outputFile])
    logList.append(["Output info file: " + infoFile])

    # pp.pprint(logList)
    # sys.exit()

    # END REGION: FILE NAMES SETUP

    outputList = ZpCalculator(relationsFile=relationsFile,
                              modifiedPeptidesFile=modifiedPeptidesFile,
                              modifiedPeptidesFile1=modifiedPeptidesFile1,
                              nonModifiedPep2ProtFile=nonModifiedPep2ProtFile,
                              varFile=varFile,
                              varFile1=varFile1,
                              pep2protein=pep2protein,
                              outname=outputFile)

    header = "idsup\tXsup\tVsup\tidinf\tXinf\tVinf\tn\tZ\tFDR"  #"Sequence\tFASTAProteinDescription\tXp\tVp\tcount\tZp"
    #######("Fix the header for Xp and Vp")
    stats.saveFile(outputFile, outputList, header)

    if len(infoFile) > 0:
        stats.saveFile(infoFile, logList, "INFO FILE")