Python MutUtils.extractProteinPosition示例

编程语言: Python

命名空间/包名称: oncotator.utils.MutUtils

类/类型: MutUtils

方法/功能: extractProteinPosition

hotexamples.com的示例: 3

Python MutUtils.extractProteinPosition - 已找到3个示例。这些是从开源项目中提取的最受好评的oncotator.utils.MutUtils.MutUtils.extractProteinPosition现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

removeDir(7)

convertChromosomeStringToMutationDataFormat(6)

getAllAttributeNames(5)

initializeMutFromAttributes(4)

validateMutation(4)

translate_sequence(3)

str2bool(3)

createFieldsMapping(3)

extractProteinPosition(2)

retrievePrecedingBasesForDeletions(2)

retrievePrecedingBasesForInsertions(2)

replaceChrs(2)

retrieveMissingAnnotations(2)

createChrom2HashCodeTable(2)

create_variant_key_by_mutation(2)

getTokens(1)

retrievePrecedingBaseFromAnnotationForDeletions(1)

retrievePrecedingBaseFromAnnotationForInsertions(1)

retrievePrecedingBaseFromReference(1)

get_all_annotation_names(1)

getUnknownAnnotations(1)

retrieveMutCoordinatesForRendering(1)

示例#1

显示文件

文件： MutUtilsTest.py 项目： Tmacme/oncotator

    def testProteinChange(self):
        """ Test that protein change parsing of start and end works.
        """
        # Each tuple is test, ground truth
        testInOuts = [
            ("p.K128_R130del", ['128','130']),
            ("p.W274G", ["274", "274"]),
            ("p.13_14AA>A", ["13", "14"]),
            ("p.G25_splice", ["25", "25"]),
            ("p.E813*", ["813", "813"]),
            ("p.SLPQPEQRPY59del", ["59", "59"])
        ]

        ctr = 1
        for test in testInOuts:
            result = MutUtils.extractProteinPosition(test[0])
            self.assertTrue(result != ['', ''], "Result was empty.  " + str(test[0]) + ".  ")
            self.assertTrue(result[0] == test[1][0] and result[1] == test[1][1], "Result did not match for " + str(test[0]) + ".  " + str(result) + "  GT: " + str(test[1]))
            ctr += 1
        self.assertTrue(MutUtils.extractProteinPosition("blahblah") == ['', ''])

示例#2

显示文件

    def testProteinChange(self):
        """ Test that protein change parsing of start and end works.
        """
        # Each tuple is test, ground truth
        testInOuts = [("p.K128_R130del", ['128', '130']),
                      ("p.W274G", ["274", "274"]),
                      ("p.13_14AA>A", ["13", "14"]),
                      ("p.G25_splice", ["25", "25"]),
                      ("p.E813*", ["813", "813"]),
                      ("p.SLPQPEQRPY59del", ["59", "59"])]

        ctr = 1
        for test in testInOuts:
            result = MutUtils.extractProteinPosition(test[0])
            self.assertTrue(result != ['', ''],
                            "Result was empty.  " + str(test[0]) + ".  ")
            self.assertTrue(
                result[0] == test[1][0] and result[1] == test[1][1],
                "Result did not match for " + str(test[0]) + ".  " +
                str(result) + "  GT: " + str(test[1]))
            ctr += 1
        self.assertTrue(
            MutUtils.extractProteinPosition("blahblah") == ['', ''])

示例#3

显示文件

文件： TabixIndexer.py 项目： ihuerga/oncotator

    def indexGeneProteinPosition(geneColumn, proteinInfoColumn, inputFilename, outputFilename):
        """
        Creates an intermediate temporary file that includes two additional columns, startAA and endAA,
        sorts the file, writes thee sorted file to outputFilename, and then indexes the sorted file.

        :param geneColumn: name of the gene column in the inputFilename
        :param proteinInfoColumn: name of the protein change or position column. Can be of formats: p.K128_R130del
        (position 128 through 130) For more examples, see MutUtilsTest.testProteinChange()
        :param inputFilename: input tsv filename
        :param outputFilename: output filename
        """
        startAACol = "startAA"
        endAACol = "endAA"

        # Create intermediate file.  Do not use '#' for comments, since header can start with '#'
        tsvReader = GenericTsvReader(inputFilename, commentPrepend=";")

        # These are the outputHeaders for the intermediate file.
        headers = tsvReader.getFieldNames()

        if startAACol not in headers:
            headers += [startAACol]
        if endAACol not in headers:
            headers += [endAACol]

        # Write to the intermediate temporary file.
        # This file is created in the current working directory."
        temp = tempfile.NamedTemporaryFile()
        csvfile = file(temp.name, 'w')

        # Initialize the intermediate file's header.
        tsvWriter = csv.DictWriter(csvfile, headers, delimiter='\t', lineterminator='\n')
        # If the headers have a leading '#', get rid of it.
        for i in range(0, len(headers)):
            header = headers[i]
            if header.startswith("#"):
                headers[i] = header.replace("#", "")
        tsvWriter.writeheader()

        # Get indices of relevant columns.
        gene_i = headers.index(geneColumn)
        startAA_i = headers.index(startAACol)
        endAA_i = headers.index(endAACol)

        # Write each line of the intermediate file.
        for row in tsvReader:
            protein = row[proteinInfoColumn]
            if protein is None or not protein.strip():
                continue
            [startAA, endAA] = MutUtils.extractProteinPosition(protein)
            if not startAA.strip() or not endAA.strip():
                continue
            row[startAACol] = startAA
            row[endAACol] = endAA
            tsvWriter.writerow(row)
        csvfile.flush()
        csvfile.close()

        # Sort the intermediate tsv file.
        tsvSorter = TsvFileSorter(temp.name)
        func = lambda val: ((val["Gene name"]).lower(), int(val["startAA"]), int(val["endAA"]))

        # Use the whole file path name.
        outputFilename = os.path.abspath(outputFilename)
        tsvSorter.sortFile(outputFilename, func)

        return TabixIndexer.index(destDir=os.path.dirname(os.path.abspath(outputFilename)),
                                  inputFilename=outputFilename, fileColumnNumList=[gene_i, startAA_i, endAA_i])