Exemplos de getIsolatedParentDir em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: mutperiodpy.helper_scripts.UsefulFileSystemFunctions

Método / Função: getIsolatedParentDir

Exemplos em hotexamples.com: 2

getIsolatedParentDir em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de mutperiodpy.helper_scripts.UsefulFileSystemFunctions.getIsolatedParentDir em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

def parseCustomBed(bedInputFilePaths, genomeFilePath, stratifyByMS, stratifyByMutSig, separateIndividualCohorts, onlySingleBaseSubs=False, includeIndels=False): if onlySingleBaseSubs and includeIndels: raise UserInputError( "Indels are incompatible with single nucleotide substitutions.") if len(bedInputFilePaths) == 0: raise UserInputError("No bed files were found to parse.") for bedInputFilePath in bedInputFilePaths: print("\nWorking in:", os.path.basename(bedInputFilePath)) # Get some important file system paths for the rest of the function and generate metadata # If this is an intermediate file, keep in mind that it's not in the data group's root directory # and metadata should already have been generated elsewhere if getIsolatedParentDir(bedInputFilePath) == "intermediate_files": dataDirectory = os.path.dirname(os.path.dirname(bedInputFilePath)) else: dataDirectory = os.path.dirname(bedInputFilePath) generateMetadata(os.path.basename(dataDirectory), getIsolatedParentDir(genomeFilePath), os.path.basename(bedInputFilePath), InputFormat.customBed, os.path.dirname(bedInputFilePath)) intermediateFilesDir = os.path.join(dataDirectory, "intermediate_files") checkDirs(intermediateFilesDir) autoAcquiredFilePath = os.path.join(intermediateFilesDir, "auto_acquire.fa") context = autoAcquireAndQACheck(bedInputFilePath, genomeFilePath, autoAcquiredFilePath, onlySingleBaseSubs, includeIndels) # Make sure the input file is not named the same as what will become the output file. If it is, it needs to be copied # to the intermediate_files directory so it is available to be read from as the new output file is being written. expectedOutputFilePath = generateFilePath( directory=dataDirectory, dataGroup=os.path.basename(dataDirectory), context=context, dataType=DataTypeStr.mutations, fileExtension=".bed") if bedInputFilePath == expectedOutputFilePath: inputFilePathCopy = os.path.join( intermediateFilesDir, os.path.basename(bedInputFilePath)) print( "Input file path is identical to generated output file path and will be overwritten. ", "Creating a copy of the input file at:", inputFilePathCopy, "to use for reading.") shutil.copy2(bedInputFilePath, inputFilePathCopy) bedInputFilePath = inputFilePathCopy # Create an instance of the WriteManager to handle writing. with WriteManager(dataDirectory, context) as writeManager: # Check to see if cohort designations are present to see if preparations need to be made. optionalArgument = tuple() with open(bedInputFilePath, 'r') as bedInputFile: line = bedInputFile.readline() # Is the cohort designation present? if len(line.strip().split('\t')) == 7: # Include in sort function optionalArgument = ("-k7,7", ) # Prepare the write manager for individual cohorts if desired. if separateIndividualCohorts: writeManager.setUpForIndividualCohorts() elif stratifyByMS or stratifyByMutSig: raise UserInputError( "Additional stratification given, but no cohort designation given." ) elif separateIndividualCohorts: raise UserInputError( "Separation by individual cohorts requested, but no cohort designation given." ) # Sort the input data (should also ensure that the output data is sorted) subprocess.run(("sort", ) + optionalArgument + ("-k1,1", "-k2,2n", "-k3,3n", bedInputFilePath, "-s", "-o", bedInputFilePath), check=True) # If requested, also prepare for stratification by microsatellite stability. if stratifyByMS: setUpForMSStratification(writeManager, bedInputFilePath) if stratifyByMutSig: setUpForMutSigStratification(writeManager, bedInputFilePath) # Go, go, go! convertToStandardInput(bedInputFilePath, writeManager, onlySingleBaseSubs, includeIndels)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ParseKucabCompendium.py Projeto: bmorledge-hampton19/mutperiod

def parseKucabCompendium(kucabSubstitutionsFilePaths: List[str], genomeFilePath, nucPosFilePath, includeAllPAHs): for kucabSubstitutionsFilePath in kucabSubstitutionsFilePaths: print("\nWorking in:", os.path.basename(kucabSubstitutionsFilePath)) if not kucabSubstitutionsFilePath.endswith("final.txt"): raise InvalidPathError( kucabSubstitutionsFilePath, "Given kucab input file does not end in \"final.txt\":") # Prepare the output file path. localRootDirectory = os.path.dirname(kucabSubstitutionsFilePath) dataGroupName = getIsolatedParentDir(kucabSubstitutionsFilePath) if includeAllPAHs: outputDirectory = os.path.join(localRootDirectory, "all_PAHs") dataGroupName += "_all_PAHs" else: dataGroupName += "_smoker_lung" outputDirectory = os.path.join(localRootDirectory, "smoker_lung") # Make sure the data directory exists. if not os.path.exists(outputDirectory): os.mkdir(outputDirectory) # Generate the output file path and metadata outputTrinucBedFilePath = generateFilePath( directory=outputDirectory, dataGroup=dataGroupName, context="trinuc", dataType=DataTypeStr.mutations, fileExtension=".bed") generateMetadata( dataGroupName, getIsolatedParentDir(genomeFilePath), getIsolatedParentDir(nucPosFilePath), os.path.join("..", os.path.basename(kucabSubstitutionsFilePath)), outputDirectory) # Get the list of acceptable chromosomes acceptableChromosomes = getAcceptableChromosomes(genomeFilePath) # These are the designations for PAH mutation signatures, the ones related to tobacco smoke that we want to study. PAHDesignations = ("MSM0.54", "MSM0.26", "MSM0.92", "MSM0.2", "MSM0.42", "MSM0.74", "MSM0.103" "MSM0.14", "MSM0.82", "MSM0.130", "MSM0.12", "MSM0.132", "MSM0.13", "MSM0.96") # These designations specifically mimic the indel signature in smokers' lung cancer tumors. LungCancerSpecificDesignations = ("MSM0.26", "MSM0.92", "MSM0.2", "MSM0.103", "MSM0.14") # Set the designations that will be used to collect data based on the input to the function. if includeAllPAHs: relevantDesignations = PAHDesignations else: relevantDesignations = LungCancerSpecificDesignations print("Reading data and writing to trinuc bed file...") with open(kucabSubstitutionsFilePath, 'r') as kucabSubstitutionsFile: with open(outputTrinucBedFilePath, 'w') as outputTrinucBedFile: firstLineFlag = True for line in kucabSubstitutionsFile: # Skip the first line with headers. if firstLineFlag: firstLineFlag = False continue # The lines are separated by tabs. The relevant data have the following indices in a tab-separated list: # 15: mutagen designation # 4: Chromosome # 5: Start Pos (1 base) # 6: Reference base # 7: Mutated base # 13: pre-base context # 14: post-base context choppedUpLine = line.strip().split('\t') # Skip the mutation if it does not belong to the relevant group. if not choppedUpLine[15] in relevantDesignations: continue # Compile the necessary information for the bed file. chromosome = "chr" + choppedUpLine[4] # Handle the weird chromsome formatting and then check for invalid chromosomes. if chromosome == "chr23": chromosome = "chrX" if chromosome == "chr24": chromosome = "chrY" if not chromosome in acceptableChromosomes: continue startPos1Base = choppedUpLine[5] startPos0Base = str(int(startPos1Base) - 1) mutatedFrom = choppedUpLine[6] mutatedTo = choppedUpLine[7] trinucContext = ''.join( (choppedUpLine[13], mutatedFrom, choppedUpLine[14])) # If the mutated base is listed as arising from a purine, flip the mutation and the strand. if isPurine(mutatedFrom): mutation = reverseCompliment( mutatedFrom) + '>' + reverseCompliment(mutatedTo) strand = '-' trinucContext = reverseCompliment(trinucContext) else: mutation = mutatedFrom + '>' + mutatedTo strand = '+' # Write the information to the trinuc bed file. outputTrinucBedFile.write('\t'.join( (chromosome, startPos0Base, startPos1Base, trinucContext, mutation, strand)) + '\n') # Sort the output file. print("Sorting output file...") subprocess.run(("sort", "-k1,1", "-k2,2n", outputTrinucBedFilePath, "-o", outputTrinucBedFilePath), check=True)