def testCache(self, minMatchPrimaryPercent=None, logSizes=False): okC = True logger.info("Reference cache lengths: refIdMap %d matchD %d refD %d", len(self.__refIdMapD), len(self.__matchD), len(self.__refD)) ok = bool(self.__refIdMapD and self.__matchD and self.__refD) # numRef = len(self.__refIdMapD) countD = defaultdict(int) logger.info("Match dictionary length %d", len(self.__matchD)) for _, mD in self.__matchD.items(): if "matched" in mD: countD[mD["matched"]] += 1 logger.info("Reference length %d match length %d coverage %r", len(self.__refD), len(self.__matchD), countD.items()) if minMatchPrimaryPercent: try: okC = 100.0 * float(countD["primary"]) / float(numRef) > minMatchPrimaryPercent except Exception: okC = False logger.info("Primary reference match percent test status %r", okC) # if logSizes: logger.info( "Pfam %.2f InterPro %.2f SIFTS %.2f GO %.2f EC %.2f RefIdMap %.2f RefMatchD %.2f RefD %.2f", getObjSize(self.__pfP) / 1000000.0, getObjSize(self.__ipP) / 1000000.0, getObjSize(self.__ssP) / 1000000.0, getObjSize(self.__goP) / 1000000.0, getObjSize(self.__ecP) / 1000000.0, getObjSize(self.__refIdMapD) / 1000000.0, getObjSize(self.__matchD) / 1000000.0, getObjSize(self.__refD) / 1000000.0, ) return ok and okC
def testCache(self, minCount=None, logSizes=False): if logSizes and self.__ccIdxD: logger.info("ccIdxD (%.2f MB)", getObjSize(self.__ccIdxD) / 1000000.0) ok = self.__ccIdxD and len( self.__ccIdxD ) >= minCount if minCount else self.__ccIdxD is not None return ok
def testCache(self, minMatchPrimaryPercent=None, logSizes=False, minMissing=0): """Test the state of reference sequence data relative to proportion of matched primary sequence in the primary data set. Args: minMatchPrimaryPercent (float, optional): minimal acceptable of matching primary accessions. Defaults to None. logSizes (bool, optional): flag to log resource sizes. Defaults to False. minMissing (int, optional): minimum acceptable missing matched reference Ids. Defaults to 0. Returns: bool: True for success or False otherwise """ try: ok = bool(self.__matchD and self.__refD and self.__missingMatchIds <= minMissing) logger.info("Reference cache lengths: matchD %d refD %d missing matches %d", len(self.__matchD), len(self.__refD), self.__missingMatchIds) if ok: return ok except Exception as e: logger.error("Failing with unexpected cache state %s", str(e)) return False # # -- The remaining check on the portion is not currently -- # numRef = len(self.__matchD) countD = defaultdict(int) logger.info("Match dictionary length %d", len(self.__matchD)) for _, mD in self.__matchD.items(): if "matched" in mD: countD[mD["matched"]] += 1 logger.info("Reference length %d match length %d coverage %r", len(self.__refD), len(self.__matchD), countD.items()) if minMatchPrimaryPercent: try: okC = 100.0 * float(countD["primary"]) / float(numRef) > minMatchPrimaryPercent except Exception: okC = False logger.info("Primary reference match count test status %r", okC) # if logSizes: logger.info( "RefMatchD %.2f RefD %.2f", getObjSize(self.__matchD) / 1000000.0, getObjSize(self.__refD) / 1000000.0, ) return ok and okC
def testCache(self, minCount=None, logSizes=False): okC = bool(self.__refD) if not okC: return okC logger.info("Reference data cache lengths: refD %d", len(self.__refD)) if minCount and len(self.__refD) < minCount: return False # if logSizes: logger.info("refD %.2f", getObjSize(self.__refD) / 1000000.0) return True
def testCache(self, minMatch=None, logSizes=False): self.getMatchData() okC = bool(self.__matchD) if not okC: return okC logger.info("Reference data cache lengths: matchD %d", len(self.__matchD)) if minMatch and len(self.__matchD) < minMatch: return False # if logSizes: logger.info("PubChem MatchD %.2f", getObjSize(self.__matchD) / 1000000.0) return True
def testCache(self, minCount=None, logSizes=False): if logSizes and self.__ccMolD: logger.info("ccMolD object size %.2f MB", getObjSize(self.__ccMolD) / 1000000.0) ok = self.__ccMolD and len(self.__ccMolD) >= minCount if minCount else self.__ccMolD is not None return ok
def testCache(self, minCount=None, logSizes=False): if logSizes and self.__searchIdx: logger.info("searchIdxD (%.2f MB)", getObjSize(self.__searchIdx) / 1000000.0) ok = self.__searchIdx and len(self.__searchIdx) >= minCount if minCount else self.__searchIdx is not None return ok