Пример #1
0
 def run(self, go_sources, exp_gos, **kws):
     """Create GoSubDag using specified GO sources."""
     print("\nSRCS: {GOs}".format(GOs=go_sources))
     gosubdag = GoSubDag(go_sources, self.go2obj_all, **kws)
     gosubdag.prt_goids(gosubdag.go2nt)
     assert set(gosubdag.go2nt) == exp_gos, "ACT({}) != EXP({})\n{} {}".format(
         sorted(gosubdag.go2nt), sorted(exp_gos), go_sources, kws)
def test_nb():
    """Test notebook code"""
    godag = get_godag("go-basic.obo", optional_attrs={'relationship'})
    go_leafs = set(o.item_id for o in godag.values() if not o.children)
    virion = 'GO:0019012'
    gosubdag_r0 = GoSubDag(go_leafs, godag)
    nt_virion = gosubdag_r0.go2nt[virion]
    print(nt_virion)
    print('r0 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt))

    gosubdag_r1 = GoSubDag(go_leafs, godag, relationships=True)
    nt_virion = gosubdag_r1.go2nt[virion]
    print(nt_virion)
    print('r1 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt))

    gosubdag_partof = GoSubDag(go_leafs, godag, relationships={'part_of'})
    nt_virion = gosubdag_partof.go2nt[virion]
    print(nt_virion)
    print('THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt))
    virion_descendants = gosubdag_partof.rcntobj.go2descendants[virion]
    print('{N} descendants of virion were found'.format(
        N=len(virion_descendants)))
    # Limit plot of descendants to get a smaller plot
    virion_capsid_fiber = {'GO:0098033', 'GO:0098032'}
    gosubdag_partof.prt_goids(virion_capsid_fiber,
                              '{NS} {GO} dcnt({dcnt}) D-{depth:02} {GO_name}')

    # Limit plot size by choosing just two virion descendants
    # Get a subset containing only a couple virion descendants and their ancestors
    pltdag = GoSubDag(virion_capsid_fiber, godag, relationships={'part_of'})
    pltobj = GoSubDagPlot(pltdag)
    pltobj.plt_dag('virion_capsid_fiber.png')
Пример #3
0
class Run(object):
    """Objects for running plotting test."""

    def __init__(self, obo, gaf, prt):
        self.prt = prt
        self.cwd = os.getcwd()
        # Gene Ontologies
        self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
        # Annotations
        #_file_gaf = dnld_gaf(os.path.join(REPO, gaf))
        _file_gaf = dnld_gaf(gaf)
        print("GAF: {GAF}\n".format(GAF=_file_gaf))
        self.gene2gos = read_gaf(_file_gaf)
        self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos)
        # GoSubDag
        self.gosubdag_all = GoSubDag(None, self.go2obj_all, tcntobj=self.tcntobj, prt=prt)
        self.prtfmt = self.gosubdag_all.prt_attr['fmta']

    def prt_goids_all(self, prt):
        """Print all GO IDs, including alternate GO IDs, in GODag."""
        self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt)

    def plt_goids(self, fout_img, go_sources):
        """Plot GO IDs."""
        # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
        gosubdag = GoSubDag(go_sources, self.gosubdag_all.go2obj, prt=self.prt,
                            # rcntobj=False,
                            rcntobj=self.gosubdag_all.rcntobj,
                            go2nt=self.gosubdag_all.go2nt)
        prtfmt = gosubdag.prt_attr['fmta']
        goids_plt = GoSubDagPlot(gosubdag).get_goids_plt()
        self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt)))
        gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt)
        objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
        objplt.plt_dag(os.path.join(self.cwd, fout_img))
Пример #4
0
class Run(object):
    """Printing GO IDs and Plotting; GODag from obo using GoSubDag."""

    def __init__(self, obo):
        self.go2obj_all = get_godag(os.path.join(REPO, obo))
        self.gosubdag_all = GoSubDag(None, self.go2obj_all)
        self.prtfmt = self.gosubdag_all.prt_attr['fmta']

    def prt_goids_all(self, prt):
        """Print all GO IDs, including alternate GO IDs, in GODag."""
        self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt)

    def plt_goids(self, fout_img, go_sources):
        """Plot GO IDs."""
        # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
        gosubdag = GoSubDag(go_sources, self.go2obj_all)
        objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
        objplt.plt_dag(os.path.join(REPO, fout_img))

    def run(self, go_sources, exp_gos, **kws):
        """Create GoSubDag using specified GO sources."""
        print("\nSRCS: {GOs}".format(GOs=go_sources))
        gosubdag = GoSubDag(go_sources, self.go2obj_all, **kws)
        gosubdag.prt_goids(gosubdag.go2nt)
        assert set(gosubdag.go2nt) == exp_gos, "ACT({}) != EXP({})\n{} {}".format(
            sorted(gosubdag.go2nt), sorted(exp_gos), go_sources, kws)
Пример #5
0
 def plt_goids(self, fout_img, go_sources):
     """Plot GO IDs."""
     # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
     gosubdag = GoSubDag(go_sources, self.gosubdag_all.go2obj, prt=self.prt,
                         # rcntobj=False,
                         rcntobj=self.gosubdag_all.rcntobj,
                         go2nt=self.gosubdag_all.go2nt)
     prtfmt = gosubdag.prt_attr['fmta']
     goids_plt = GoSubDagPlot(gosubdag).get_goids_plt()
     self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt)))
     gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt)
     objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
     objplt.plt_dag(os.path.join(self.cwd, fout_img))
Пример #6
0
 def plt_goids(self, fout_img, go_sources):
     """Plot GO IDs."""
     # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
     gosubdag = GoSubDag(
         go_sources,
         self.gosubdag_all.go2obj,
         prt=self.prt,
         # rcntobj=False,
         rcntobj=self.gosubdag_all.rcntobj,
         go2nt=self.gosubdag_all.go2nt)
     prtfmt = gosubdag.prt_attr['fmta']
     goids_plt = GoSubDagPlot(gosubdag).get_goids_plt()
     self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt)))
     gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt)
     objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
     objplt.plt_dag(os.path.join(self.cwd, fout_img))
Пример #7
0
class Run(object):
    """Printing GO IDs and Plotting; GODag from obo using GoSubDag."""
    def __init__(self, obo):
        self.cwd = os.getcwd()
        self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
        self.gosubdag_all = GoSubDag(None, self.go2obj_all)
        self.prtfmt = self.gosubdag_all.prt_attr['fmta']

    def prt_goids_all(self, prt):
        """Print all GO IDs, including alternate GO IDs, in GODag."""
        self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt)

    def plt_goids(self, fout_img, go_sources):
        """Plot GO IDs."""
        # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
        gosubdag = GoSubDag(go_sources, self.go2obj_all)
        objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
        objplt.plt_dag(os.path.join(self.cwd, fout_img))
Пример #8
0
def test_semantic_i88():
    """Computing basic semantic similarities between GO terms."""
    godag = obo_parser.GODag("go-basic.obo")
    goids = set(go for go, o in godag.items() if go == o.id)
    goids = set(godag.keys())
    # Get all the annotations from arabidopsis.
    fin_gaf = os.path.join(REPO, "tair.gaf")
    # dnld_assc includes read_gaf
    associations = dnld_assc(fin_gaf, godag, prt=None)

    # First get the counts and information content for each GO term.
    termcounts = TermCounts(godag, associations)
    gosubdag = GoSubDag(goids, godag, tcntobj=termcounts)

    # Now we can calculate the semantic distance and semantic similarity, as so:
    #       "The semantic similarity between terms GO:0048364 and GO:0044707 is 0.25.
    go_id3 = 'GO:0048364'  # BP level-03 depth-04 root development
    go_id4 = 'GO:0044707'  # BP level-02 depth-02 single-multicellular organism process
    go_root = deepest_common_ancestor([go_id3, go_id4], godag)
    sim = semantic_similarity(go_id3, go_id4, godag)
    print('\nThe semantic similarity between terms {GO1} and {GO2} is {VAL}.'.
          format(GO1=go_id3, GO2=go_id4, VAL=sim))
    gosubdag.prt_goids([go_root, go_id3, go_id4])

    # Calculate the information content
    go_id = "GO:0048364"
    infocontent = get_info_content(go_id, termcounts)
    print('\nInformation content ({GO}) = {INFO}\n'.format(GO=go_id,
                                                           INFO=infocontent))

    # Resnik's similarity measure is defined as the information content of the most
    # informative common ancestor. That is, the most specific common parent-term in
    # the GO. Then we can calculate this as follows:
    #       "Resnik similarity score (GO:0048364, GO:0044707) = 4.0540784252
    sim_r = resnik_sim(go_id3, go_id4, godag, termcounts)
    print('Resnik similarity score ({GO1}, {GO2}) = {VAL}'.format(GO1=go_id3,
                                                                  GO2=go_id4,
                                                                  VAL=sim_r))

    # Lin similarity score (GO:0048364, GO:0044707) = -0.607721957763
    sim_l = lin_sim(go_id3, go_id4, godag, termcounts)
    print('Lin similarity score ({GO1}, {GO2}) = {VAL}'.format(GO1=go_id3,
                                                               GO2=go_id4,
                                                               VAL=sim_l))
Пример #9
0
class Run(object):
    """Printing GO IDs and Plotting; GODag from obo using GoSubDag."""

    def __init__(self, obo):
        self.cwd = os.getcwd()
        self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
        self.gosubdag_all = GoSubDag(None, self.go2obj_all)
        self.prtfmt = self.gosubdag_all.prt_attr['fmta']

    def prt_goids_all(self, prt):
        """Print all GO IDs, including alternate GO IDs, in GODag."""
        self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt)

    def plt_goids(self, fout_img, go_sources):
        """Plot GO IDs."""
        # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
        gosubdag = GoSubDag(go_sources, self.go2obj_all)
        objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
        objplt.plt_dag(os.path.join(self.cwd, fout_img))
Пример #10
0
def test_semantic_i88():
    """Computing basic semantic similarities between GO terms."""
    godag = obo_parser.GODag("go-basic.obo")
    goids = set(go for go, o in godag.items() if go == o.id)
    goids = set(godag.keys())
    # Get all the annotations from arabidopsis.
    fin_gaf = os.path.join(REPO, "tair.gaf")
    # dnld_assc includes read_gaf
    associations = dnld_assc(fin_gaf, godag, prt=None)

    # First get the counts and information content for each GO term.
    termcounts = TermCounts(godag, associations)
    gosubdag = GoSubDag(goids, godag, tcntobj=termcounts)

    # Now we can calculate the semantic distance and semantic similarity, as so:
    #       "The semantic similarity between terms GO:0048364 and GO:0044707 is 0.25.
    go_id3 = 'GO:0048364' # BP level-03 depth-04 root development
    go_id4 = 'GO:0044707' # BP level-02 depth-02 single-multicellular organism process
    go_root = deepest_common_ancestor([go_id3, go_id4], godag)
    sim = semantic_similarity(go_id3, go_id4, godag)
    print('\nThe semantic similarity between terms {GO1} and {GO2} is {VAL}.'.format(
        GO1=go_id3, GO2=go_id4, VAL=sim))
    gosubdag.prt_goids([go_root, go_id3, go_id4])

    # Calculate the information content
    go_id = "GO:0048364"
    infocontent = get_info_content(go_id, termcounts)
    print('\nInformation content ({GO}) = {INFO}\n'.format(GO=go_id, INFO=infocontent))

    # Resnik's similarity measure is defined as the information content of the most
    # informative common ancestor. That is, the most specific common parent-term in
    # the GO. Then we can calculate this as follows:
    #       "Resnik similarity score (GO:0048364, GO:0044707) = 4.0540784252
    sim_r = resnik_sim(go_id3, go_id4, godag, termcounts)
    print('Resnik similarity score ({GO1}, {GO2}) = {VAL}'.format(
        GO1=go_id3, GO2=go_id4, VAL=sim_r))

    # Lin similarity score (GO:0048364, GO:0044707) = -0.607721957763
    sim_l = lin_sim(go_id3, go_id4, godag, termcounts)
    print('Lin similarity score ({GO1}, {GO2}) = {VAL}'.format(
        GO1=go_id3, GO2=go_id4, VAL=sim_l))
Пример #11
0
class Run(object):
    """Objects for running plotting test."""
    def __init__(self, obo, gaf, prt):
        self.prt = prt
        self.cwd = os.getcwd()
        # Gene Ontologies
        self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo))
        # Annotations
        #_file_gaf = dnld_gaf(os.path.join(REPO, gaf))
        _file_gaf = dnld_gaf(gaf)
        print("GAF: {GAF}\n".format(GAF=_file_gaf))
        self.gene2gos = read_gaf(_file_gaf)
        self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos)
        # GoSubDag
        self.gosubdag_all = GoSubDag(None,
                                     self.go2obj_all,
                                     tcntobj=self.tcntobj,
                                     prt=prt)
        self.prtfmt = self.gosubdag_all.prt_attr['fmta']

    def prt_goids_all(self, prt):
        """Print all GO IDs, including alternate GO IDs, in GODag."""
        self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt)

    def plt_goids(self, fout_img, go_sources):
        """Plot GO IDs."""
        # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id
        gosubdag = GoSubDag(
            go_sources,
            self.gosubdag_all.go2obj,
            prt=self.prt,
            # rcntobj=False,
            rcntobj=self.gosubdag_all.rcntobj,
            go2nt=self.gosubdag_all.go2nt)
        prtfmt = gosubdag.prt_attr['fmta']
        goids_plt = GoSubDagPlot(gosubdag).get_goids_plt()
        self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt)))
        gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt)
        objplt = GoSubDagPlot(gosubdag, mark_alt_id=True)
        objplt.plt_dag(os.path.join(self.cwd, fout_img))
def _wr_sub_obo(fout_obo, goid_chosen, godag_r1, fin_obo):
    """Sub plot used for visualizing this test file's elements"""
    # Load GO-DAG: Load optional 'relationship'
    godag = {go: o for go, o in godag_r1.items() if go == o.item_id}
    _prt_rtel_ctr(godag)
    rels_all = set([
        'part_of', 'regulates', 'negatively_regulates', 'positively_regulates'
    ])
    goids_leaf_all = set(o.id for o in godag.values() if not o.children)
    gosubdag_r1 = GoSubDag(goids_leaf_all,
                           godag,
                           relationships=True,
                           prt=sys.stdout)
    goids_src_r1_all = _get_leafs_w_relsinhier(rels_all, gosubdag_r1)
    gosubdag_r1.prt_goids(goids_src_r1_all)
    # Pick one of the GO IDs as a source for the subset DAG
    gosubdag_viral = GoSubDag({goid_chosen},
                              godag,
                              relationships=True,
                              prt=sys.stdout)
    goids_viral = set(gosubdag_viral.go2obj.keys())
    with open(fout_obo, 'w') as prt:
        WrSubObo.prt_goterms(fin_obo, goids_viral, prt)
        print('{N} GO IDs WROTE: {OBO}'.format(N=len(goids_viral),
                                               OBO=fout_obo))
    # Plot obo subset
    pat_r1 = '{REPO}/scripts/go_plot.py {GO} -o {PNG} -r'
    pat_r0 = '{REPO}/scripts/go_plot.py {GO} -o {PNG}'
    system(
        pat_r1.format(REPO=REPO,
                      PNG=fout_obo.replace('.obo', '_r1.png'),
                      GO=goid_chosen))
    system(
        pat_r0.format(REPO=REPO,
                      PNG=fout_obo.replace('.obo', '_r0.png'),
                      GO=goid_chosen))
    def plot_all(self, goids, name, prt=sys.stdout):
        """Create plots with various numbers of relationships."""
        prt.write("\nCreate GoSubDag not loading any relationship")
        gosubdag_orig = GoSubDag(goids, self.go2obj, relationships=False, prt=prt)
        gosubdag_orig.prt_goids(gosubdag_orig.go2obj, prt=prt)
        prt.write("{N} GO IDS".format(N=len(gosubdag_orig.go2obj)))
        gopltdag = GoSubDagPlot(gosubdag_orig, mark_alt_id=True)
        gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r0.png".format(NAME=name)))

        # goids.update(['GO:0007507'], ['GO:0072359'])
        prt.write("\nCreate GoSubDag while loading only the 'part_of' relationship")
        gosubdag = GoSubDag(goids, self.go2obj, relationships=['part_of'], prt=prt)
        gosubdag.prt_goids(gosubdag.go2obj, prt=prt)
        prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj)))
        gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True)
        prt.write("GO SOURCES:")
        gosubdag.prt_goids(gosubdag.go_sources, prt=prt)
        gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_partof.png".format(NAME=name)))

        prt.write("\nCreate GoSubDag while loading all relationships")
        gosubdag = GoSubDag(goids, self.go2obj, relationships=True, prt=prt)
        prt.write("ALL {N} GO IDS:".format(N=len(gosubdag.go2obj)))
        gosubdag.prt_goids(gosubdag.go2obj, prt=prt)
        prt.write("2 GO SOURCES:")
        gosubdag.prt_goids(gosubdag.go_sources, prt=prt)
        goids_new = set(gosubdag.go2obj).difference(set(gosubdag_orig.go2obj))
        go2color = {go:'#d5ffff' for go in goids_new}
        prt.write("{N} NEW GO IDS:".format(N=len(goids_new)))
        gosubdag.prt_goids(goids_new, prt=prt)
        prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj)))
        gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True, go2color=go2color)
        gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r1.png".format(NAME=name)))
Пример #14
0
    def plot_all(self, goids, name, prt=sys.stdout):
        """Create plots with various numbers of relationships."""
        prt.write("\nCreate GoSubDag not loading any relationship")
        gosubdag_orig = GoSubDag(goids,
                                 self.go2obj,
                                 relationships=False,
                                 prt=prt)
        gosubdag_orig.prt_goids(gosubdag_orig.go2obj, prt=prt)
        prt.write("{N} GO IDS".format(N=len(gosubdag_orig.go2obj)))
        gopltdag = GoSubDagPlot(gosubdag_orig, mark_alt_id=True)
        gopltdag.plt_dag(
            os.path.join(REPO,
                         "a_relationship_{NAME}_r0.png".format(NAME=name)))

        # goids.update(['GO:0007507'], ['GO:0072359'])
        prt.write(
            "\nCreate GoSubDag while loading only the 'part_of' relationship")
        gosubdag = GoSubDag(goids,
                            self.go2obj,
                            relationships=['part_of'],
                            prt=prt)
        gosubdag.prt_goids(gosubdag.go2obj, prt=prt)
        prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj)))
        gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True)
        prt.write("GO SOURCES:")
        gosubdag.prt_goids(gosubdag.go_sources, prt=prt)
        gopltdag.plt_dag(
            os.path.join(REPO,
                         "a_relationship_{NAME}_partof.png".format(NAME=name)))

        prt.write("\nCreate GoSubDag while loading all relationships")
        gosubdag = GoSubDag(goids, self.go2obj, relationships=True, prt=prt)
        prt.write("ALL {N} GO IDS:".format(N=len(gosubdag.go2obj)))
        gosubdag.prt_goids(gosubdag.go2obj, prt=prt)
        prt.write("2 GO SOURCES:")
        gosubdag.prt_goids(gosubdag.go_sources, prt=prt)
        goids_new = set(gosubdag.go2obj).difference(set(gosubdag_orig.go2obj))
        go2color = {go: '#d5ffff' for go in goids_new}
        prt.write("{N} NEW GO IDS:".format(N=len(goids_new)))
        gosubdag.prt_goids(goids_new, prt=prt)
        prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj)))
        gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True, go2color=go2color)
        gopltdag.plt_dag(
            os.path.join(REPO,
                         "a_relationship_{NAME}_r1.png".format(NAME=name)))