Exemplo n.º 1
0
 def testProviderReadValidationReport(self):
     mU = MarshalUtil()
     vpr = ValidationReportAdapter(dirPath=os.path.join(
         self.__workPath, "vprt"),
                                   useCache=False,
                                   cleaCache=True)
     vrd = vpr.getReader()
     cL = mU.doImport(self.__exampleFileXray,
                      fmt="xml",
                      marshalHelper=vrd.toCif)
     ok = mU.doExport(self.__cifFileXray, cL, fmt="mmcif")
     self.assertTrue(ok)
     #
     vpr = ValidationReportAdapter(dirPath=os.path.join(
         self.__workPath, "vprt"),
                                   useCache=True,
                                   cleaCache=False)
     vrd = vpr.getReader()
     xrt = mU.doImport(self.__exampleFileNmr, fmt="xml")
     cL = vrd.toCif(xrt)
     ok = mU.doExport(self.__cifFileNmr, cL, fmt="mmcif")
     self.assertTrue(ok)
     #
     vpr = ValidationReportAdapter(dirPath=os.path.join(
         self.__workPath, "vprt"),
                                   useCache=True,
                                   cleaCache=False)
     vrd = vpr.getReader()
     xrt = mU.doImport(self.__exampleFilEm, fmt="xml")
     cL = vrd.toCif(xrt)
     ok = mU.doExport(self.__cifFileEm, cL, fmt="mmcif")
     self.assertTrue(ok)
Exemplo n.º 2
0
 def __reload(self, dirPath, baseVersion, useCache, **kwargs):
     startTime = time.time()
     mU = MarshalUtil(workPath=dirPath)
     chemblDbUrl = kwargs.get(
         "ChEMBLDbUrl",
         "ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/latest/")
     ok = False
     fU = FileUtil()
     fU.mkdir(dirPath)
     #
     # ChEMBL current version <baseVersion>,...
     # template:  chembl_<baseVersion>.fa.gz
     #
     targetFileName = "chembl_" + str(baseVersion) + ".fa.gz"
     mappingFileName = "chembl_uniprot_mapping.txt"
     #
     chemblTargetPath = os.path.join(dirPath, targetFileName)
     chemblMappingPath = os.path.join(dirPath, mappingFileName)
     mappingFilePath = os.path.join(dirPath, "chembl_uniprot_mapping.json")
     #
     mapD = {}
     if useCache and fU.exists(mappingFilePath):
         logger.info("useCache %r using %r and %r and %r", useCache,
                     chemblTargetPath, chemblMappingPath, mappingFilePath)
         mapD = mU.doImport(mappingFilePath, fmt="json")
     else:
         # Get the ChEMBL UniProt mapping file
         url = os.path.join(chemblDbUrl, mappingFileName)
         ok = fU.get(url, chemblMappingPath)
         logger.info("Fetched %r url %s path %s", ok, url,
                     chemblMappingPath)
         logger.info("Reading ChEMBL mapping file path %s", mappingFilePath)
         rowL = mU.doImport(chemblMappingPath, fmt="tdd", rowFormat="list")
         for row in rowL:
             mapD[row[0]] = (row[1], row[2], row[3])
         ok = mU.doExport(mappingFilePath, mapD, fmt="json")
         logger.info("Processed mapping path %s (%d) %r", mappingFilePath,
                     len(mapD), ok)
         #
         # Get the target FASTA files --
         for vers in range(baseVersion, baseVersion + 10):
             logger.info("Now fetching version %r", vers)
             self.__version = vers
             targetFileName = "chembl_" + str(vers) + ".fa.gz"
             chemblTargetPath = os.path.join(dirPath,
                                             "chembl_targets_raw.fa.gz")
             url = os.path.join(chemblDbUrl, targetFileName)
             ok = fU.get(url, chemblTargetPath)
             logger.info("Fetched %r url %s path %s", ok, url,
                         chemblTargetPath)
             if ok:
                 break
     #
     logger.info("Completed reload at %s (%.4f seconds)",
                 time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                 time.time() - startTime)
     #
     return mapD
Exemplo n.º 3
0
 def testReadUrlTarfile(self):
     """Test the case to read URL target and extract a member"""
     try:
         mU = MarshalUtil(workPath=self.__workPath)
         _, fn = os.path.split(self.__urlTarget)
         #
         nmL = mU.doImport(self.__urlTarget, fmt="tdd", rowFormat="list", tarMember="names.dmp")
         self.assertGreater(len(nmL), 2000000)
         logger.info("Names %d", len(nmL))
         ndL = mU.doImport(os.path.join(self.__workPath, fn), fmt="tdd", rowFormat="list", tarMember="nodes.dmp")
         self.assertGreater(len(ndL), 2000000)
         logger.info("Nodes %d", len(ndL))
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
 def __rebuildCache(self, **kwargs):
     mU = MarshalUtil()
     # source directory path
     srcDirPath = kwargs.get("srcDirPath", None)
     # cache details
     cacheKwargs = kwargs.get("cacheKwargs", {"fmt": "pickle"})
     useCache = kwargs.get("useCache", True)
     entrySaveLimit = kwargs.get("entrySaveLimit", None)
     abbreviated = str(kwargs.get("abbreviated", "TEST")).upper()
     #
     # cacheDirPath = kwargs.get("cacheDirPath", None)
     cacheDirPath = self.__cacheDirPath
     pyVersion = sys.version_info[0]
     ext = "pic" if cacheKwargs["fmt"] == "pickle" else "json"
     saveFilePath = os.path.join(cacheDirPath, "sifts-summary-py%s.%s" % (str(pyVersion), ext))
     #
     ssD = {}
     try:
         if useCache and os.access(saveFilePath, os.R_OK):
             ssD = mU.doImport(saveFilePath, **cacheKwargs)
         else:
             if not srcDirPath:
                 logger.error("Missing SIFTS source path details")
                 return ssD
             ssD = self.__getSummaryMapping(srcDirPath, abbreviated=abbreviated)
             if entrySaveLimit:
                 ssD = {k: ssD[k] for k in list(ssD.keys())[:entrySaveLimit]}
             mU.mkdir(cacheDirPath)
             ok = mU.doExport(saveFilePath, ssD, **cacheKwargs)
             logger.debug("Saving SIFTS summary serialized data file %s (%d) status %r", saveFilePath, len(ssD), ok)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return ssD
    def reloadDump(self, fmt="json"):
        """Reload PubChem reference data store from saved dump.

        Args:
            fmt (str, optional): format of the backup file (pickle or json). Defaults to "json".

        Returns:
            (int): number of objects restored.
        """
        numUpd = 0
        try:
            # Read from disk backup and update object store -
            if fmt in ["json", "pickle"]:
                fp = self.__getdumpFilePath(fmt="json")
                logger.info("Restoring object store from %s", fp)
                mU = MarshalUtil(workPath=self.__dirPath)
                matchD = mU.doImport(fp, fmt=fmt)
                numUpd = self.__reloadDump(
                    matchD,
                    self.__databaseName,
                    self.__matchIndexCollectionName,
                    indexAttributeNames=["rcsb_id", "rcsb_last_update"])
        except Exception as e:
            logger.exception("Failing for %r with %s", self.__dirPath, str(e))
        # --
        return numUpd
Exemplo n.º 6
0
    def getJsonSchema(self, databaseName, collectionName, encodingType="BSON", level="full", extraOpts=None):
        """Return JSON schema (w/ BSON types) object for the input collection and level.and

        Args:
            databaseName (str): database name
            collectionName (str): collection name in document store
            encodingType (str, optional): data type convention (BSON|JSON)
            level (str, optional): Completeness of the schema (e.g. min or full)

        Returns:
            dict: Schema object

        """
        sObj = None
        schemaLocator = self.__getJsonSchemaLocator(databaseName, collectionName, encodingType=encodingType, level=level)
        #
        if self.__rebuildFlag:
            filePath = os.path.join(self.__schemaCachePath, self.__fileU.getFileName(schemaLocator))
            self.makeSchema(databaseName, collectionName, encodingType=encodingType, level=level, extraOpts=extraOpts)
        else:
            filePath = self.__reload(schemaLocator, self.__jsonSchemaCachePath, useCache=self.__useCache)
        mU = MarshalUtil(workPath=self.__workPath)
        if filePath and mU.exists(filePath):
            mU = MarshalUtil(workPath=self.__workPath)
            sObj = mU.doImport(filePath, fmt="json")
        else:
            logger.debug("Failed to read schema for %s %r", collectionName, level)
        return sObj
Exemplo n.º 7
0
    def updateDefaultDataTypeMap(self, filePath, mapD, dataTyping="ANY"):
        """Update data file containing application default data type mapping with any
         updates from the input type mapping dictionary

        mapD['cif_type_code'] -> ['application_name', 'app_type_code', 'app_precision_default', 'app_width_default', 'type_code']

               data_rcsb_data_type_map
                 loop_
                 _pdbx_data_type_application_map.application_name
                 _pdbx_data_type_application_map.type_code
                 _pdbx_data_type_application_map.app_type_code
                 _pdbx_data_type_application_map.app_precision_default
                 _pdbx_data_type_application_map.app_width_default
                 # .... type mapping data ...
        """
        try:
            #
            mD = copy.deepcopy(mapD)
            mU = MarshalUtil(workPath=self.__workPath)
            containerList = mU.doImport(filePath,
                                        fmt="mmcif",
                                        enforceAscii=True,
                                        useCharRefs=True,
                                        raiseExceptions=True)
            for container in containerList:
                if container.getName() == "rcsb_data_type_map":
                    catObj = container.getObj("pdbx_data_type_application_map")
                    rIL = []
                    for ii in range(catObj.getRowCount()):
                        dD = catObj.getRowAttributeDict(ii)
                        if dD["application_name"] == dataTyping:
                            rIL.append(ii)
                            mD[dD["type_code"]] = {
                                k: dD[k]
                                for k in [
                                    "application_name", "app_type_code",
                                    "app_precision_default",
                                    "app_width_default", "type_code"
                                ]
                            }
                            continue
                    ok = catObj.removeRows(rIL)
                    atNameL = catObj.getAttributeList()
                    for ky in mapD:
                        row = [mapD[ky][atN] for atN in atNameL]
                        catObj.append(row)
            #
            # Write updated data file
            mU = MarshalUtil(workPath=self.__workPath)
            ok = mU.doExport(filePath,
                             containerList,
                             fmt="mmcif",
                             enforceAscii=True,
                             useCharRefs=True,
                             raiseExceptions=True)

            return ok
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
 def __getApi(self, dictLocators, **kwargs):
     """ Return an instance of a dictionary API instance for the input dictionary locator list.
     """
     consolidate = kwargs.get("consolidate", True)
     replaceDefinition = kwargs.get("replaceDefinitions", True)
     verbose = kwargs.get("verbose", True)
     #
     ok = self.__reload(dictLocators,
                        self.__dirPath,
                        useCache=self.__useCache)
     #
     dApi = None
     if ok:
         mU = MarshalUtil()
         containerList = []
         for dictLocator in dictLocators:
             cacheFilePath = os.path.join(
                 self.__dirPath, self.__fileU.getFileName(dictLocator))
             containerList.extend(
                 mU.doImport(cacheFilePath, fmt="mmcif-dict"))
         #
         dApi = DictionaryApi(containerList=containerList,
                              consolidate=consolidate,
                              replaceDefinition=replaceDefinition,
                              verbose=verbose)
     return dApi
 def testSubsetBuildMoleculeCacheFiltered(self):
     """Test construction of a filtered selection of chemical component definitions."""
     mU = MarshalUtil()
     fD = mU.doImport(self.__missedIdsPath, fmt="json")
     filterIdD = {ccId: True for ccId in fD["filteredIdList"]}
     self.__testBuildMoleculeCacheFiles(filterIdD=filterIdD,
                                        ccFileNamePrefix="cc-filtered")
Exemplo n.º 10
0
    def readDefaultDataTypeMap(self, locator, dataTyping="ANY"):
        """Read data file containing application default data type mapping

              data_rcsb_data_type_map
                loop_
                _pdbx_data_type_application_map.application_name
                _pdbx_data_type_application_map.type_code
                _pdbx_data_type_application_map.app_type_code
                _pdbx_data_type_application_map.app_precision_default
                _pdbx_data_type_application_map.app_width_default
                # .... type mapping data ...

        Return (dict):  map[cifType] -> appType, width, precision
                    mapD['cif_type_code'] -> ['application_name', 'app_type_code', 'app_precision_default', 'app_width_default', 'type_code']
        """
        try:
            #
            mapD = {}
            mU = MarshalUtil(workPath=self.__workPath)
            containerList = mU.doImport(locator, fmt="mmcif", enforceAscii=True, useCharRefs=True, raiseExceptions=True)

            for container in containerList:
                if container.getName() == "rcsb_data_type_map":
                    catObj = container.getObj("pdbx_data_type_application_map")
                    for ii in range(catObj.getRowCount()):
                        dD = catObj.getRowAttributeDict(ii)
                        if dD["application_name"] == dataTyping:
                            mapD[dD["type_code"]] = {k: dD[k] for k in ["app_type_code", "application_name", "type_code"]}
                            mapD[dD["type_code"]].update({k: int(dD[k]) for k in ["app_precision_default", "app_width_default"]})
            return mapD
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return {}
Exemplo n.º 11
0
 def __parseFasta(self,
                  fastaPath,
                  taxonPath,
                  cachePath,
                  dirPath,
                  addTaxonomy=False):
     # input paths
     chemblTargetRawPath = os.path.join(dirPath, "chembl_targets_raw.fa.gz")
     mU = MarshalUtil(workPath=cachePath)
     oD = {}
     uD = {}
     missTax = 0
     taxonL = []
     try:
         if addTaxonomy:
             umP = UniProtIdMappingProvider(cachePath)
             umP.reload(useCache=True)
         #
         fD = mU.doImport(chemblTargetRawPath,
                          fmt="fasta",
                          commentStyle="default")
         #
         for seqId, sD in fD.items():
             chemblId = seqId.strip().split(" ")[0].strip()
             unpId = seqId[seqId.find("[") + 1:seqId.find("]")]
             seq = sD["sequence"]
             cD = {
                 "sequence": seq,
                 "uniprotId": unpId,
                 "chemblId": chemblId
             }
             if addTaxonomy:
                 taxId = umP.getMappedId(unpId, mapName="NCBI-taxon")
                 cD["taxId"] = taxId if taxId else -1
                 if not taxId:
                     missTax += 1
             #
             seqId = ""
             cL = []
             for k, v in cD.items():
                 if k in ["sequence"]:
                     continue
                 cL.append(str(v))
                 cL.append(str(k))
             seqId = "|".join(cL)
             oD[seqId] = cD
             if addTaxonomy:
                 taxonL.append("%s\t%s" % (seqId, taxId))
             #
             uD.setdefault(unpId, []).append(chemblId)
         #
         ok1 = mU.doExport(fastaPath, oD, fmt="fasta", makeComment=True)
         ok3 = True
         if addTaxonomy:
             ok3 = mU.doExport(taxonPath, taxonL, fmt="list")
         return ok1 & ok3
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     #
     return False
Exemplo n.º 12
0
    def getCCDefFile(self, ccFilePath, molBuildType="model-xyz", suppressHydrogens=False):
        """Fetch the molecule definition (ccPath) and build OE molecules
        for comparison.

        """
        #
        mU = MarshalUtil(workPath=self.__workPath)
        rdCcObjL = mU.doImport(ccFilePath, fmt="mmcif")
        oemf = OeMoleculeFactory()
        if not self.__verbose:
            oemf.setQuiet()
        ccId = oemf.setChemCompDef(rdCcObjL[0])
        oemf.build(molBuildType=molBuildType)

        if self.__verbose:
            logger.info("  CCId               = %s", ccId)
            logger.info("  Title              = %s", oemf.getTitle())
            logger.info("  SMILES             = %s", oemf.getCanSMILES())
            logger.info("  SMILES (stereo)    = %s", oemf.getIsoSMILES())
            logger.info("  Formula (Hill)     = %s", oemf.getFormula())
            logger.info("  InChI key          = %s", oemf.getInChIKey())
            logger.info("  InChI              = %s", oemf.getInChI())

        fD = {}
        fD = {"Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey()}

        if suppressHydrogens:
            tMol = oemf.getGraphMolSuppressH()
        else:
            tMol = oemf.getMol()

        fD["OEMOL"] = tMol
        fD["xyz"] = oemf.getAtomDetails(xyzType="model")

        return (ccId, tMol, fD)
Exemplo n.º 13
0
class ValidationReportReaderTests(unittest.TestCase):
    def setUp(self):
        self.__mU = MarshalUtil()
        self.__dirPath = os.path.join(HERE, "test-data")
        self.__workPath = os.path.join(HERE, "test-output")
        self.__exampleFileXray = os.path.join(self.__dirPath,
                                              "3rer_validation.xml")
        self.__cifFileXray = os.path.join(self.__workPath,
                                          "3rer_validation.cif")
        #
        self.__exampleFileNmr = os.path.join(self.__dirPath,
                                             "6drg_validation.xml")
        self.__cifFileNmr = os.path.join(self.__workPath,
                                         "6drg_validation.cif")
        #
        self.__exampleFileEm = os.path.join(self.__dirPath,
                                            "5a32_validation.xml")
        self.__cifFileEm = os.path.join(self.__workPath, "5a32_validation.cif")
        #
        self.__dictionaryMapPath = os.path.join(HERE, "test-data",
                                                "vrpt_dictmap_v4.json")
        self.__dictionaryMap = self.__mU.doImport(self.__dictionaryMapPath,
                                                  fmt="json")

    def tearDown(self):
        pass

    def testReadXrayValidationReport(self):
        vrr = ValidationReportReader(self.__dictionaryMap)
        xrt = self.__mU.doImport(self.__exampleFileXray, fmt="xml")
        cL = vrr.toCif(xrt)
        ok = self.__mU.doExport(self.__cifFileXray, cL, fmt="mmcif")
        self.assertTrue(ok)

    def testReadNmrValidationReport(self):
        vrr = ValidationReportReader(self.__dictionaryMap)
        xrt = self.__mU.doImport(self.__exampleFileNmr, fmt="xml")
        cL = vrr.toCif(xrt)
        ok = self.__mU.doExport(self.__cifFileNmr, cL, fmt="mmcif")
        self.assertTrue(ok)

    def testReadEmValidationReport(self):
        vrr = ValidationReportReader(self.__dictionaryMap)
        xrt = self.__mU.doImport(self.__exampleFileEm, fmt="xml")
        cL = vrr.toCif(xrt)
        ok = self.__mU.doExport(self.__cifFileEm, cL, fmt="mmcif")
        self.assertTrue(ok)
Exemplo n.º 14
0
    def jsonSchemaCompare(self,
                          databaseName,
                          collectionName,
                          encodingType,
                          level,
                          extraOpts=None):
        """Compare computed JSON schema defintion with current source/cached version.

        Args:
            databaseName (str): schema name
            collectionName (str): collection name
            encodingType (str): schema data type conventions (JSON|BSON)
            level (str): metadata level (min|full)
            extraOpts (str): extra schema construction options

        Returns:
            (str): path to the difference file or None
        """
        mU = MarshalUtil(workPath=self.__workPath)
        schemaDiffPath = os.path.join(self.__cachePath, "schema_diff")
        mU.mkdir(schemaDiffPath)
        schemaLocator = self.__getJsonSchemaLocator(databaseName,
                                                    collectionName,
                                                    encodingType, level)
        fn = self.__fileU.getFileName(schemaLocator)
        schemaPath = os.path.join(self.__jsonSchemaCachePath, fn)
        #
        sD = self.makeSchema(databaseName,
                             collectionName,
                             encodingType=encodingType,
                             level=level,
                             saveSchema=False,
                             extraOpts=extraOpts)
        v2 = self.__getSchemaVersion(sD)
        # ----
        # tPath = os.path.join(self.__jsonSchemaCachePath, self.__fileU.getFileName(schemaPath) + "-test")
        # logger.info("Exporting json schema to %s", tPath)
        # mU.doExport(tPath, sD, fmt="json", indent=3)
        # ----
        #
        sDCache = mU.doImport(schemaPath, fmt="json")
        v1 = self.__getSchemaVersion(sDCache)
        if not v1:
            logger.error("no version for %s - %s %s", schemaLocator,
                         databaseName, collectionName)
        #
        numDiff, difD = self.schemaCompare(sDCache, sD)
        # jD = diff(sDCache, sD, marshal=True, syntax="explicit")
        diffPath = None
        if numDiff:
            logger.debug("diff for %s %s %s %s = \n%s", databaseName,
                         collectionName, encodingType, level,
                         pprint.pformat(difD, indent=3, width=100))
            bn, _ = os.path.splitext(fn)
            diffPath = os.path.join(schemaDiffPath,
                                    bn + "-" + v1 + "-" + v2 + "-diff.json")
            mU.doExport(diffPath, difD, fmt="json", indent=3)

        return diffPath
Exemplo n.º 15
0
    def getSchemaInfo(self, databaseName, dataTyping="ANY"):
        """Convenience method to return essential schema details for the input repository content type.

        Args:
            databaseName (str): schema name  (e.g. pdbx, bird, chem_comp, ...)
            dataTyping (str, optional): Application name for the target schema (e.g. ANY, SQL, ...)

        Returns:
            tuple: SchemaDefAccess(object), target database name, target collection name list, primary index attribute list


        """
        sd = None
        dbName = None
        collectionNameList = []
        docIndexD = {}
        try:
            mU = MarshalUtil(workPath=self.__workPath)
            schemaLocator = self.__getSchemaDefLocator(databaseName,
                                                       dataTyping=dataTyping)
            if self.__rebuildFlag:
                filePath = os.path.join(
                    self.__schemaCachePath,
                    self.__fileU.getFileName(schemaLocator))
                self.makeSchemaDef(databaseName,
                                   dataTyping=dataTyping,
                                   saveSchema=True)
            else:
                filePath = self.__reload(schemaLocator,
                                         self.__schemaCachePath,
                                         useCache=self.__useCache)

            if not filePath:
                logger.error("Unable to recover schema %s (%s)", databaseName,
                             dataTyping)
            logger.debug("ContentType %r dataTyping %r schemaLocator %r",
                         databaseName, dataTyping, schemaLocator)
            schemaDef = mU.doImport(filePath, fmt="json")
            if schemaDef:
                logger.debug(
                    "Using cached schema definition for %s application %s",
                    databaseName, dataTyping)
                sd = SchemaDefAccess(schemaDef)
                if sd:
                    dbName = sd.getDatabaseName()
                    collectionInfoList = sd.getCollectionInfo()
                    logger.debug("Schema %s database name %s collections %r",
                                 databaseName, dbName, collectionInfoList)
                    for cd in collectionInfoList:
                        collectionName = cd["NAME"]
                        collectionNameList.append(collectionName)
                        docIndexD[collectionName] = sd.getDocumentIndices(
                            collectionName)

        except Exception as e:
            logger.exception("Retreiving schema %s for %s failing with %s",
                             databaseName, dataTyping, str(e))

        return sd, dbName, collectionNameList, docIndexD
Exemplo n.º 16
0
 def fetch(self):
     try:
         provenanceFileCachePath = self.__reload(self.__provenanceLocator, self.__provenanceCachePath, useCache=self.__useCache)
         mU = MarshalUtil(workPath=self.__workPath)
         return mU.doImport(provenanceFileCachePath, fmt="json")
     except Exception as e:
         logger.exception("Failed retreiving provenance with %s", str(e))
     return {}
Exemplo n.º 17
0
 def __getRegistry(self, registryPath):
     """"""
     try:
         mU = MarshalUtil()
         obj = mU.doImport(registryPath, fmt="json")
         return obj["mmcif_dictionary_registry"]
     except Exception as e:
         logger.exception("Failing for %r with %s", registryPath, str(e))
Exemplo n.º 18
0
 def testGetCollectionObjects(self):
     sP = os.path.join(
         self.__cachePath, "json_schema_definitions",
         "json-full-db-pdbx_core-col-pdbx_core_nonpolymer_entity.json")
     mU = MarshalUtil()
     sD = mU.doImport(sP, fmt="json")
     logger.info("kys %r", list(sD.keys()))
     for ky in sD["properties"]:
         logger.info(" - %s", ky)
Exemplo n.º 19
0
 def fetchModelIndex(self):
     mD = {}
     try:
         mU = MarshalUtil(workPath=self.__cachePath)
         fp = self.__getModelIndexPath()
         mD = mU.doImport(fp, fmt="json")
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return mD
Exemplo n.º 20
0
 def __readSiftsSummaryFile(self, filePath, rowFormat="dict"):
     """Read input SIFTS summary file and return a list of dictionaries."""
     try:
         mU = MarshalUtil()
         cL = mU.doImport(filePath, fmt="csv", rowFormat=rowFormat)
         logger.debug("Container list %d", len(cL))
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return cL
Exemplo n.º 21
0
 def fetchPathList(self):
     pathList = []
     fp = self.getIndexFilePath()
     try:
         mU = MarshalUtil()
         pathList = mU.doImport(fp, fmt="json")
     except Exception as e:
         logger.info("Failing for %r with %s", fp, str(e))
     return pathList
Exemplo n.º 22
0
 def __getItemCounts(self, itemCoverageFilePath):
     #
     mU = MarshalUtil()
     rowList = mU.doImport(itemCoverageFilePath, fmt="tdd", rowFormat="list")
     itemCountD = {}
     for row in rowList:
         itemCountD[row[0]] = int(row[1])
     #
     return itemCountD
Exemplo n.º 23
0
 def __getSearchResults(self):
     """Read search results and convert to a chemical component dictionary."""
     fp = self.getRawResultFilePath()
     mU = MarshalUtil(workPath=self.__cachePath)
     rawL = mU.doImport(fp, fmt="json")
     rD = {}
     for cD in rawL:
         rD.setdefault(cD["ccId"], []).append(cD)
     return rD
Exemplo n.º 24
0
 def __checkAssembledModels(self, assembleModelPath):
     catNameL = [
         "pdbx_chem_comp_model",
         "pdbx_chem_comp_model_atom",
         "pdbx_chem_comp_model_bond",
         "pdbx_chem_comp_model_descriptor",
         "pdbx_chem_comp_model_reference",
         "pdbx_chem_comp_model_feature",
         "pdbx_chem_comp_model_audit",
     ]
     mU = MarshalUtil(workPath=self.__cachePath)
     dataContainerL = mU.doImport(assembleModelPath, fmt="mmcif")
     logger.info("Read %d data containers", len(dataContainerL))
     rD = {}
     cnD = {}
     for dataContainer in dataContainerL:
         nm = dataContainer.getName()
         logger.debug("datacontainer %r", nm)
         if nm in cnD:
             logger.info("Duplicate container id %r", nm)
             cnD[nm] = True
         #
         pId = self.__parseId(nm)[0]
         cObj = dataContainer.getObj("pdbx_chem_comp_model")
         modelId = cObj.getValue("id", 0)
         if modelId != nm:
             logger.error("modelId %r datablock %r", modelId, nm)
         #
         tD = {}
         for catName in catNameL:
             cObj = dataContainer.getObj(catName)
             nRows = cObj.getRowCount()
             tD[catName] = nRows
         cObj = dataContainer.getObj("pdbx_chem_comp_model_feature")
         skip = False
         for ii in range(cObj.getRowCount()):
             fN = cObj.getValue("feature_name", ii)
             fV = cObj.getValue("feature_value", ii)
             if fN == "heavy_atoms_only" and fV == "Y":
                 skip = True
                 break
         if not skip:
             rD.setdefault(pId, []).append(tD)
     #
     for pId, tDL in rD.items():
         for catName in catNameL:
             minV = 100000
             maxV = -1
             for tD in tDL:
                 minV = min(minV, tD[catName])
                 maxV = max(maxV, tD[catName])
             if maxV - minV > 2 and catName not in [
                     "pdbx_chem_comp_model_feature"
             ]:
                 logger.error("%s %s row count inconsistency %d %d", pId,
                              catName, minV, maxV)
Exemplo n.º 25
0
 def testReadUrlTarfileFail(self):
     """Test the case to read URL target and extract a member (failing case)"""
     try:
         mU = MarshalUtil(workPath=self.__workPath)
         rL = mU.doImport(self.__urlTargetBad, fmt="tdd", rowFormat="list", tarMember="names.dmp")
         logger.info("Return is %r", rL)
         self.assertEqual(len(rL), 0)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemplo n.º 26
0
 def readTestFile(self, filePath):
     """Read input and return a list of dictionaries."""
     cL = []
     try:
         mU = MarshalUtil()
         cL = mU.doImport(filePath, fmt="tdd")
         logger.debug("Container list %d", len(cL))
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return cL
Exemplo n.º 27
0
 def update(self, provD):
     ok = False
     try:
         provenanceFileCachePath = self.__reload(self.__provenanceLocator, self.__provenanceCachePath, useCache=self.__useCache)
         mU = MarshalUtil(workPath=self.__workPath)
         tD = mU.doImport(provenanceFileCachePath, fmt="json")
         tD.update(provD)
         ok = mU.doExport(provenanceFileCachePath, tD, fmt="json")
     except Exception as e:
         logger.exception("Failed updating provenance with %s", str(e))
     return ok
Exemplo n.º 28
0
    def __rebuildCache(self, urlTargetIsoLtwa, dirPath, useCache):
        """Rebuild the cache of ISO abbreviation term data

        Args:
            urlTargetIsoLtwa (str): URL for ISO4 LTWA title word abbreviations
            dirPath (str):  cache path
            useCache (bool):  flag to use cached files

        Returns:
            tuple: (dict) title word abbreviations
                   (dict) language conflict dictionary
                   (list) multi-word abbreviation targets

        Notes:
            ISO source file (tab delimited UTF-16LE) is maintained at the ISSN site -
            https://www.issn.org/wp-content/uploads/2013/09/LTWA_20160915.txt
        """
        aD = {}
        mU = MarshalUtil(workPath=dirPath)
        fmt = "json"
        ext = fmt if fmt == "json" else "pic"
        isoLtwaNamePath = os.path.join(dirPath, "iso-ltwa.%s" % ext)
        logger.debug("Using cache data path %s", dirPath)
        mU.mkdir(dirPath)
        if not useCache:
            for fp in [isoLtwaNamePath]:
                try:
                    os.remove(fp)
                except Exception:
                    pass
        #
        if useCache and mU.exists(isoLtwaNamePath):
            aD = mU.doImport(isoLtwaNamePath, fmt=fmt)
            logger.debug("Abbreviation name length %d", len(aD["abbrev"]))
        elif not useCache:
            # ------
            fU = FileUtil()
            logger.info("Fetch data from source %s in %s", urlTargetIsoLtwa,
                        dirPath)
            fp = os.path.join(dirPath, fU.getFileName(urlTargetIsoLtwa))
            ok = fU.get(urlTargetIsoLtwa, fp)
            aD = self.__getLtwaTerms(dirPath, fp)
            ok = mU.doExport(isoLtwaNamePath, aD, fmt=fmt)
            logger.debug("abbrevD keys %r", list(aD.keys()))
            logger.debug("Caching %d ISO LTWA in %s status %r",
                         len(aD["abbrev"]), isoLtwaNamePath, ok)
        #
        abbrevD = aD["abbrev"] if "abbrev" in aD else {}
        conflictD = aD["conflicts"] if "conflicts" in aD else {}
        multiWordTermL = aD[
            "multi_word_abbrev"] if "multi_word_abbrev" in aD else []
        #
        return abbrevD, conflictD, multiWordTermL
Exemplo n.º 29
0
    def __reload(self, urlTarget, dirPath, useCache=True):
        """Reload local cache of mapping resources to support validation report reader and translator.

        Args:
            urlTarget (list, str): URL for schema mapping file
            dirPath (str): path to the directory containing cache files
            useCache (bool, optional): flag to use cached files. Defaults to True.

        Returns:
            (object): instance of ValidationReportReader()
        """
        mapD = {}
        #
        mU = MarshalUtil()
        fU = FileUtil()
        fn = fU.getFileName(urlTarget)
        mappingFilePath = os.path.join(dirPath, fn)
        mU.mkdir(dirPath)
        #
        # if not useCache:
        #     for fp in [mappingFilePath]:
        #         try:
        #             os.remove(fp)
        #         except Exception:
        #             pass
        # #
        logger.debug("Loading validation mapping data in %s (useCache %r)", fn,
                     useCache)
        if useCache and fU.exists(mappingFilePath):
            mapD = mU.doImport(mappingFilePath, fmt="json")
        else:
            logger.info("Fetching url %s to resource file %s", urlTarget,
                        mappingFilePath)
            tS = uuid.uuid4().hex
            tP = os.path.join(dirPath, "._" + tS)
            ok = fU.get(urlTarget, tP)
            if ok:
                mapD = mU.doImport(tP, fmt="json")
                os.replace(tP, mappingFilePath)
        return mapD
Exemplo n.º 30
0
    def readIndex(self):
        try:
            mU = MarshalUtil()
            if not mU.exists(self._indexFilePath):
                return False
            indexObj = mU.doImport(self._indexFilePath, fmt=self.__fmt)
            if indexObj is not None and len(indexObj) > 0:
                self._rL.extend(indexObj)
            return True
        except Exception as e:
            logger.error("Failing with %s", str(e))

        return False