def test_get_children(prt=sys.stdout): """Semantic Similarity test for Issue #86.""" # Load GO-DAG fin_obo = "go-basic.obo" repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") godag = get_godag(os.path.join(repo, fin_obo)) go2obj = {go: o for go, o in godag.items() if go == o.id} # Get all children for all GO IDs using get_all_children in GOTerm class tic = timeit.default_timer() go2children_orig = {} go2children_empty = set() for goobj in go2obj.values(): children = goobj.get_all_children() if children: go2children_orig[goobj.id] = children else: go2children_empty.add(goobj.id) tic = prt_hms(tic, "Get all goobj's children using GOTerm.get_all_children()", prt) # Get all children for all GO IDs using GOTerm get_all_children go2children_fast = get_id2children(go2obj.values()) prt_hms(tic, "Get all goobj's children using go_tasks::get_id2children", prt) # Compare children lists CheckGOs('test_get_children', go2obj).chk_a2bset(go2children_orig, go2children_fast)
def test_get_upperselect(prt=sys.stdout): """Test getting parents and user-specfied ancestor relationships""" # Load GO-DAG repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") godag = get_godag(os.path.join(repo, 'go-basic.obo'), optional_attrs='relationship') run = RelationshipCombos(godag) run.chk_relationships_all() rels_combo = run.get_relationship_combos() print('{N} COMBINATIONS OF RELATIONSHIPS'.format(N=len(rels_combo))) for relidx, rels_set in enumerate(rels_combo, 1): print('{I}) RELATIONSHIPS[{N}]: {Rs}'.format( I=relidx, N=len(rels_set), Rs=' '.join(sorted(rels_set)))) # ------------------------------------------------------------------------ # Get all parents for all GO IDs using get_all_parents in GOTerm class tic = timeit.default_timer() # pylint: disable=line-too-long go2upperselect_orig = {o.item_id:get_all_upperselect(o, rels_set) for o in run.go2obj.values()} tic = prt_hms(tic, "Get all goobj's parents using get_all_upperselect(GOTerm)", prt) # ------------------------------------------------------------------------ # Get all parents for all GO IDs using GOTerm get_all_parents go2upperselect_fast = get_id2upperselect(run.go2obj.values(), rels_set) tic = prt_hms(tic, "Get all goobj's parents using go_tasks::get_id2upperselect", prt) # ------------------------------------------------------------------------ # Compare parent lists chkr = CheckGOs('test_get_upper_select', godag) chkr.chk_a2bset(go2upperselect_orig, go2upperselect_fast) # EXPECTED, ACTUAL print("PASSED: get_upperselect RELATIONSHIPS[{N}]: {Rs}".format( N=len(rels_set), Rs=' '.join(sorted(rels_set))))
def test_david_chart(): """Read in a small obo, print list of GO terms and plot.""" repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") david_dir = "{REPO}/data/gjoneska_pfenning".format(REPO=repo) ntobj = cx.namedtuple("david6p8", "TOTAL FDR Bonferroni Benjamini PValue") # pylint: disable=bad-whitespace fin2exp = { "david_chart6p8_Consistent_Decrease.txt": ntobj._make([1773, 259, 249, 432, 1316]), "david_chart6p8_Transient_Decrease.txt": ntobj._make([423, 0, 2, 2, 246]), "david_chart6p8_Consistent_Increase.txt": ntobj._make([2359, 353, 308, 781, 1868]), "david_chart6p8_Transient_Increase.txt": ntobj._make([2191, 658, 652, 1105, 1786]), "david_chart6p8_Late_Decrease.txt": ntobj._make([2752, 591, 568, 1153, 2187]), "david_chart6p8_Late_Increase.txt": ntobj._make([4597, 708, 616, 1715, 3603]), } tic = timeit.default_timer() fin2obj = { f: DavidChartReader(os.path.join(david_dir, f)) for f in fin2exp.keys() } prt_hms(tic, "Created DavidChartReader objects") for fin, obj in fin2obj.items(): ntexp = fin2exp[fin] assert ntexp.TOTAL == len(obj.nts) obj.prt_num_sig() ctr = obj.get_num_sig() for fld, cnt_actual in ctr.most_common(): assert cnt_actual == getattr( ntexp, fld), "{FIN}: {FLD} Act({ACT}) Exp({EXP})".format( FIN=fin, FLD=fld, ACT=cnt_actual, EXP=getattr(ntexp, fld))
def chk_get_goterms_upper(self): """Check that GOTerm's 'get_upper' returns parents and relationships.""" tic = timeit.default_timer() for goterm in self.go2obj.values(): goids_act = set(o.item_id for o in goterm.get_goterms_upper()) goids_exp = self._get_goterms_upper(goterm.item_id) assert goids_act == goids_exp prt_hms(tic, "get_goterms_upper")
def chk_get_goterms_lower(self): """Check that GOTerm's 'get_lower' returns parents and relationships.""" tic = timeit.default_timer() for goterm in self.go2obj.values(): goids_act = set(o.item_id for o in goterm.get_goterms_lower()) goids_exp = self._get_goterms_lower(goterm.item_id) assert goids_act == goids_exp, "{GO} EXP({E}) ACT({A})".format( GO=goterm.item_id, E=goids_exp, A=goids_act) prt_hms(tic, "get_goterms_lower")
def get_gosubdag_r0(self, goids): """Return a GoSubDag with N randomly chosen GO sources.""" tic = timeit.default_timer() gosubdag = GoSubDag( goids, self.godag_r0, relationships=None, #rcntobj=self.gosubdag_r0.rcntobj, prt=None) prt_hms( tic, "GoSubDag r0 {N:4} GOs {S:3} srcs".format( N=len(gosubdag.go2obj), S=len(gosubdag.go_sources))) return gosubdag
def _randoms(self, prt): """Randomly select GO terms for semantic similarity calculations""" #pylint: disable=line-too-long goids = self.goids go_pairs = [(goids[i], goids[i + 1]) for i in range(0, len(self.goids), 2)] tic = timeit.default_timer() # Information on Python's round, which is used in 2 spots in pygosemsim: # https://stackoverflow.com/questions/13479163/round-float-to-x-decimals # from decimal import Decimal # >>> Decimal('66.66666666666').quantize(Decimal('1e-4')) # Decimal('66.6667') # >>> Decimal('1.29578293').quantize(Decimal('1e-6')) # Decimal('1.295783') # In issue, https://github.com/micropython/micropython/issues/3516, # https://github.com/mdickinson dreams of deprecating the two-argument form of round in Python.... # https://github.com/micropython/micropython/issues/3516#issuecomment-625298591 # Use the decimal type instead: https://docs.python.org/3.10/library/decimal.html acts = [self.wang.get_sim(a, b) for a, b in go_pairs] tic = prt_hms(tic, 'GOATOOLS wang calc') exps = [similarity.wang(self.graph, a, b) for a, b in go_pairs] tic = prt_hms(tic, 'pysemsim wang') assert len(acts) == len(exps) failures = 0 for idx, (act, exp, (go_a, go_b)) in enumerate(zip(acts, exps, go_pairs)): assert act is not None, self._prt_ab(idx, go_a, go_b, act, exp, stdout) assert exp is not None, self._prt_ab(idx, go_a, go_b, act, exp, stdout) if abs(exp - act) > 0.02: for strm in [prt, stdout]: go_a = goids[2 * idx] go_b = goids[2 * idx + 1] self._prt_ab(idx, go_a, go_b, act, exp, strm) stdout.flush() prt.flush() failures += 1 self.prt_ancestors(go_a, True) self.prt_ancestors(go_b, True) else: prt.write('{i} PASS {A} {B} pygosemsim={b:f} GOATOOLS={a:f}\n'. format(i=idx, A=goids[2 * idx], B=goids[2 * idx + 1], a=act, b=exp))
def __init__(self, fin_godag, num_calcs, relationships, w_e, seed, prt): tic = timeit.default_timer() self.godag = get_godag(fin_godag, optional_attrs=['relationship'], prt=prt) tic = prt_hms(tic, 'GOATOOLS read godag') # Needed because pysemsim not understand cygwin pathes self.graph = graph.from_resource(splitext(fin_godag)[0]) tic = prt_hms(tic, 'pygosemsim read godag') self.seedobj = RandomSeed32(seed) self.goids = self._init_goids(num_calcs) tic = timeit.default_timer() self.wang = SsWang(self.goids, self.godag, relationships, w_e) self.go2reldepth = get_go2reldepth( {self.godag[go] for go in self.godag}, relationships) tic = prt_hms(tic, 'GOATOOLS wang setup')
def test_i154_semsim_lin(): """Test for issue 148, Lin Similarity if a term has no annotations""" fin_dag = download_go_basic_obo() tic = timeit.default_timer() optional_attrs = {'consider', 'replaced_by'} load_obsolete = True prt = sys.stdout godag = GODag(fin_dag, optional_attrs, load_obsolete, prt) prt_hms(tic, 'Loaded GO DAG') assert godag['GO:0000067'].consider assert godag['GO:0003734'].replaced_by == 'GO:0030532' godag = GODag(fin_dag, 'consider', load_obsolete, prt) prt_hms(tic, 'Loaded GO DAG') assert godag['GO:0000067'].consider
def test_update_association(): """Compare new propagate cnts function with original function. Test assc results is same.""" print('\n1) READ GODAG:') assc_name = "goa_human.gaf" # gene_association.fb gene_association.mgi obo = join(REPO, "go-basic.obo") tic = timeit.default_timer() godag = get_godag(obo) tic = prt_hms(tic, "Created two GODags: One for original and one for new propagate counts") print('\n2) READ ANNOTATIONS:') assc_orig = dnld_assc(join(REPO, assc_name), godag, prt=stdout) tic = prt_hms(tic, "Associations Read") objanno = get_objanno(join(REPO, assc_name), 'gaf', godag=godag) tic = prt_hms(tic, "Associations Read") print('\n3) MAKE COPIES OF ASSOCIATIONS:') assc1 = {g:set(gos) for g, gos in assc_orig.items()} assc2 = {g:set(gos) for g, gos in assc_orig.items()} tic = prt_hms(tic, "Associations Copied: One for original and one for new") print('\n4) UPDATE ASSOCIATIONS (PROPAGATE COUNTS):') godag.update_association(assc1) tic = prt_hms(tic, "ORIG: godag.update_association(assc)") update_association(assc2, godag) tic = prt_hms(tic, "NEW SA: update_association(go2obj, assc_orig)") assc3 = objanno.get_id2gos(namespace='BP', propagate_counts=True) tic = prt_hms(tic, "NEW BASE: update_association(go2obj, assc_orig)") print('\n5) RUN CHECKS') _chk_assc(assc1, assc2) _chk_assc(assc1, assc3) _chk_godag(godag, obo)
def __init__(self): download_go_basic_obo(self.obo, sys.stdout, loading_bar=None) self.godag_r0 = GODag(self.obo) self.godag_r1 = GODag(self.obo, optional_attrs=set(['relationship'])) self.goids = list(set(o.id for o in self.godag_r0.values())) # GoSubDag (plain) tic = timeit.default_timer() self.gosubdag_r0 = GoSubDag(self.goids, self.godag_r0, prt=None) prt_hms( tic, "GoSubDag r0 {N:4} GOs {S:3} srcs".format( N=len(self.gosubdag_r0.go2obj), S=len(self.gosubdag_r0.go_sources))) # GoSubDag with relationships self.gosubdag_r1 = GoSubDag(self.goids, self.godag_r1, prt=None, relationships=True) prt_hms( tic, "GoSubDag r1 {N:4} GOs {S:3} srcs".format( N=len(self.gosubdag_r1.go2obj), S=len(self.gosubdag_r1.go_sources)))
def test_get_parent(prt=sys.stdout): """Semantic Similarity test for Issue #86.""" # Load GO-DAG fin_obo = "go-basic.obo" repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") godag = get_godag(os.path.join(repo, fin_obo)) go2obj = {go: o for go, o in godag.items() if go == o.id} # ------------------------------------------------------------------------ # Get all parents for all GO IDs using get_all_parents in GOTerm class tic = timeit.default_timer() go2parents_orig = {} ## go_noparents = set() for goterm in go2obj.values(): parents = goterm.get_all_parents() #if parents: go2parents_orig[goterm.id] = parents #else: # go_noparents.add(goterm.id) tic = prt_hms(tic, "Get all goobj's parents using GOTerm.get_all_parents()", prt) # ------------------------------------------------------------------------ # Get all parents for all GO IDs using GOTerm get_all_parents go2parents_fast = get_id2parents(go2obj.values()) tic = prt_hms(tic, "Get all goobj's parents using go_tasks::get_id2parents", prt) # ------------------------------------------------------------------------ go2parents_fast2 = get_id2parents2(go2obj.values()) tic = prt_hms(tic, "Get all goobj's parents using go_tasks::get_id2parents2", prt) # ------------------------------------------------------------------------ # Compare parent lists chkr = CheckGOs('test_get_parents', go2obj) chkr.chk_a2bset_equiv(go2parents_orig, go2parents_fast) chkr.chk_a2bset_equiv(go2parents_orig, go2parents_fast2) print("PASSED: get_parent test")