def test_proteomeStatistics(): s = summary.Summary(dataPath + 'annotation.pklz') s.proteomeStatistics(dataPath + "expression", dataPath + "expression_case") proteome = [x for x in io.readTable("proteome_features.tsv")] assert len(proteome) == 20 txs = [ x['Transcript'] for x in proteome if x['Feature_type'] == 'Transcript' ] assert len(txs) == len(set(txs)) # ENSG00.5 is complete, as it only lacks information of its two aberrant transcripts assert [x for x in proteome if x['GeneId'] == 'ENSG00.5'] assert len([x for x in proteome if x['Feature_type'] == 'Prosite']) == 3 assert len([x for x in proteome if x['Feature_type'] == 'Pfam']) == 3 # ENST08.1 goes first in the file, and both have same median assert len([ x for x in proteome if x['Transcript'] == 'ENST08.1' and x['Feature_type'] != 'Transcript' ]) == 2 assert len([x for x in proteome if x['Transcript'] == 'ENST09.1']) == 0 assert len([ x for x in proteome if x['Transcript'] == 'ENST18.3' and x['Feature_type'] != 'Transcript' ]) == 1 os.remove('proteome_features.tsv')
def getDomainInteractions(self, ddi): if self._new and not ddi: raise SpadaError( "A file containing the domain-domain interactions must be provided." ) elif not ddi: self.logger.info( "Domain-domain interactions from the provided network will be used." ) return self.logger.info("Building isoform-isoform interaction network.") allDDIs = { frozenset([x['Pfam1'], x['Pfam2']]) for x in io.readTable(ddi, keys=['Pfam1', 'Pfam2']) } gene2tx = io.getGene2Tx(self._txs) for gene1, gene2 in self._genes._net.edges(): for tx1, tx2 in product(gene2tx.get(gene1, set()), gene2tx.get(gene2, set())): possibleDDIs = { frozenset(x) for x in product(self._txs[tx1]["Pfam"], self._txs[tx2] ["Pfam"]) } matches = possibleDDIs & allDDIs if matches: self._txs.add_edge(tx1, tx2, ddi=matches)
def test_printSwitches(): g = get_switches.GetSwitches(dataPath + 'annotation.pklz') g.run(dataPath + 'switches') io.printSwitches(g._genes, g._txs) switches = [ x for x in io.readTable("switches_spada.tsv") ] assert len(switches) == 8 os.remove("switches_spada.tsv")
def test_ppiAnalysis(): s = structural_analysis.StructuralAnalysis((g._genes, g._txs)) s.ppiAnalysis() ddi = [x for x in io.readTable("ppi_analysis.tsv")] assert len(ddi) == 6 assert len([x for x in ddi if x['What'] == "Unaffected"]) == 2 assert len([x for x in ddi if x['What'] == "Affected"]) == 2 assert len([x for x in ddi if x['What'] == "Lost_in_cases"]) == 1 assert len([x for x in ddi if x['What'] == "Gained_in_cases"]) == 1 os.remove("ppi_analysis.tsv")
def addAberrant(self, aberrant): if aberrant: self.logger.info("Import aberrant isoforms absent in GTF.") prev = self._txs.skip_filter self._txs.skip_filter = True for line in io.readTable(aberrant, keys=['GeneId', 'Transcript']): self._txs.add_node(line['Transcript'], line['GeneId']) self._txs.update_node(line["Transcript"], "canonical", False) self._txs.skip_filter = prev
def readSwitches(self, switchesFile, tx_network): """Import a set of genes with an isoform switch from candidateList.tsv. """ self.logger.debug("Retrieving calculated isoform switches.") for line in io.readTable(switchesFile): gene = line['GeneId'] ctrl = line['Control_transcript'] case = line['Case_transcript'] samples = set(line['Samples'].split(',')) if self.valid_switch(gene, ctrl, case, tx_network): thisSwitch = LiteSwitch(ctrl, case, samples) self.update_node("switches", thisSwitch, full_name=gene)
def getIsoformFeatures(self, features): if self._new and not features: raise SpadaError( "A file with the protein features must be provided.") elif not features: self.logger.info( "Protein features from the provided network will be used.") return self.logger.info("Reading isoform features.") featureFields = [ 'Transcript', 'Feature_type', 'Feature', 'Start', 'End' ] for line in io.readTable(features, keys=featureFields): tx = line['Transcript'] featureType = line['Feature_type'] feature = line['Feature'] start = int(line['Start']) end = int(line['End']) self._txs.update_node(tx, featureType, (start, end), feature)
def test_featureAnalysis(): s = structural_analysis.StructuralAnalysis((g._genes, g._txs)) s.featureAnalysis() # pfams pfam = [x for x in io.readTable("pfam_analysis.tsv")] assert len(pfam) == 8 assert [ x for x in pfam if x['Control_transcript'] == "ENST01.2" and x['Case_transcript'] == "ENST02.2" and x['What'] == "Nothing" and x['Feature'] == "D1" ] assert [ x for x in pfam if x['Control_transcript'] == "ENST01.2" and x['Case_transcript'] == "ENST02.2" and x['What'] == "Lost_in_cases" and x['Feature'] == "D1" ] assert [ x for x in pfam if x['Control_transcript'] == "ENST01.2" and x['Case_transcript'] == "ENST02.2" and x['What'] == "Gained_in_cases" and x['Feature'] == "D4" ] assert [ x for x in pfam if x['Control_transcript'] == "ENST01.2" and x['Case_transcript'] == "ENST02.2" and x['What'] == "Gained_in_cases" and x['Feature'] == "D2" ] assert [ x for x in pfam if x['Control_transcript'] == "ENST02.2" and x['Case_transcript'] == "ENST01.2" and x['What'] == "Nothing" and x['Feature'] == "D1" ] assert [ x for x in pfam if x['Control_transcript'] == "ENST02.2" and x['Case_transcript'] == "ENST01.2" and x['What'] == "Gained_in_cases" and x['Feature'] == "D1" ] assert [ x for x in pfam if x['Control_transcript'] == "ENST02.2" and x['Case_transcript'] == "ENST01.2" and x['What'] == "Lost_in_cases" and x['Feature'] == "D4" ] assert [ x for x in pfam if x['Control_transcript'] == "ENST02.2" and x['Case_transcript'] == "ENST01.2" and x['What'] == "Lost_in_cases" and x['Feature'] == "D2" ] # prosite prosite = [x for x in io.readTable("prosite_analysis.tsv")] assert len(prosite) == 3 assert [ x for x in prosite if x['Control_transcript'] == "ENST08.1" and x['Case_transcript'] == "ENST09.1" and x['What'] == "Nothing" and x['Feature'] == "P1" ] assert [ x for x in prosite if x['Control_transcript'] == "ENST08.1" and x['Case_transcript'] == "ENST09.1" and x['What'] == "Lost_in_cases" and x['Feature'] == "P1" ] assert [ x for x in prosite if x['Control_transcript'] == "ENST08.1" and x['Case_transcript'] == "ENST09.1" and x['What'] == "Gained_in_cases" and x['Feature'] == "P2" ] # idr idr = [x for x in io.readTable("idr_analysis.tsv")] assert len(idr) == 6 assert [ x for x in idr if x['Control_transcript'] == "ENST16.2" and x['Case_transcript'] == "ENST14.5" and x['What'] == "Lost_in_cases" and x['Sequence'] == "ABcd" ] assert len([ x for x in idr if x['Control_transcript'] == "ENST16.2" and x['Case_transcript'] == "ENST14.5" ]) == 1 os.remove("pfam_analysis.tsv") os.remove("prosite_analysis.tsv") os.remove("idr_analysis.tsv")