Exemplo n.º 1
0
    def __buildCategoryDefinition(self, name, description, keyAttributeNames,
                                  examples, contexts):
        """Construct an attribute definition from input attribute dictionary
            containing metadata extracted from the XML schema, and from the
            input schema name mapping dictionary.

        Args:
            name (str): category name
            description (str): category description
            keyAttributeNames (list): key attribute names
            examples (list): category examples
            contexts (list): category contexts

        Returns:
            Definition container (object):

        """
        defC = DefinitionContainer(name)
        #
        dc = DataCategory(
            "category",
            attributeNameList=["id", "description", "mandatory_code"])
        dc.append([name, description, "no"])
        defC.append(dc)
        #
        dc = DataCategory("category_key", attributeNameList=["name"])
        for keyAttributeName in keyAttributeNames:
            keyItemName = CifName.itemName(name, keyAttributeName)
            dc.append([keyItemName])
        defC.append(dc)

        dc = DataCategory("category_group", attributeNameList=["id"])
        dc.append(["inclusive_group"])
        dc.append(["validation_report_group"])
        defC.append(dc)
        # pdbx_category_context
        dc = DataCategory("pdbx_category_context",
                          attributeNameList=["category_id", "type"])
        for cType in contexts:
            dc.append([name, cType])
        defC.append(dc)
        #
        dc = DataCategory("category_examples",
                          attributeNameList=["detail", "case"])
        for example in examples:
            dc.append([".", example])
        defC.append(dc)

        return defC
Exemplo n.º 2
0
    def test_gen_ddlm(self, in_tmpdir, test_files):
        myIo = IoAdapterPy()
        containerList = myIo.readFile(
            inputFilePath=str(test_files / 'mmcif_pdbx_v5_next.dic'))
        dApi = DictionaryApi(containerList=containerList, consolidate=True)
        parentD = dApi.getParentDictionary()
        #
        oCList = []
        dDef = DataContainer('mmcif_pdbx_ddlm_auto')
        dc = DataCategory("dictionary")
        dc.appendAttribute("title")
        dc.appendAttribute("class")
        dc.appendAttribute("version")
        dc.appendAttribute("date")
        dc.appendAttribute("ddl_conformance")
        dc.appendAttribute("text")
        dc.append([
            'mmcif_pdbx_ddlm_auto', 'Instance', 'latest', '2018-03-09',
            'ddlm best effort',
            'Software converted PDBx dictionary using DDLm semantics'
        ])
        dDef.append(dc)
        oCList.append(dDef)

        catIdx = dApi.getCategoryIndex()
        for catName in sorted(catIdx.keys()):
            attNameList = catIdx[catName]
            # created definition container -
            cDef = DefinitionContainer(catName)
            oCList.append(cDef)
            #
            dc = DataCategory("definition")
            dc.appendAttribute("id")
            dc.appendAttribute("scope")
            dc.appendAttribute("class")
            dc.appendAttribute("update")
            dc.append([catName, "Category", "Loop", "2018-03-09"])
            cDef.append(dc)
            val = dApi.getCategoryDescription(category=catName)
            dc = DataCategory("description")
            dc.appendAttribute("text")
            dc.append([val])
            cDef.append(dc)
            #
            dc = DataCategory("name")
            dc.appendAttribute("category_id")
            dc.appendAttribute("object_id")

            valList = dApi.getCategoryGroupList(category=catName)
            pcg = catName
            for val in valList:
                if val != 'inclusive_group':
                    pcg = val
                    break
            dc.append([catName, pcg])
            cDef.append(dc)

            valList = dApi.getCategoryKeyList(category=catName)
            if len(valList) < 1:
                print("Missing caegory key for category %s\n" % catName)
            else:
                dc = DataCategory("category")
                dc.appendAttribute("key_id")
                kItemName = CifName.itemName(catName, "synthetic_key")
                dc.append([kItemName])
                cDef.append(dc)

                iDef = DefinitionContainer(kItemName)
                self._makeKeyItem(catName, "synthetic_key", valList, iDef)
                oCList.append(iDef)

            for attName in attNameList:
                itemName = CifName.itemName(catName, attName)
                iDef = DefinitionContainer(itemName)

                oCList.append(iDef)

                #
                dc = DataCategory("definition")
                dc.appendAttribute("id")
                dc.appendAttribute("scope")
                dc.appendAttribute("class")
                dc.appendAttribute("update")
                dc.append([itemName, "Item", "Single", "2013-08-22"])
                iDef.append(dc)
                #
                val = dApi.getDescription(category=catName, attribute=attName)
                dc = DataCategory("description")
                dc.appendAttribute("text")
                dc.append([val])
                iDef.append(dc)
                #
                dc = DataCategory("name")
                dc.appendAttribute("category_id")
                dc.appendAttribute("object_id")
                #
                if itemName in parentD:
                    dc.appendAttribute("linked_item_id")
                    dc.append([catName, attName, parentD[itemName][0]])
                else:
                    dc.append([catName, attName])
                iDef.append(dc)
                #
                #
                aliasList = dApi.getItemAliasList(category=catName,
                                                  attribute=attName)
                if len(aliasList) > 0:
                    dc = DataCategory("alias")
                    dc.appendAttribute("definition_id")
                    for alias in aliasList:
                        dc.append([alias[0]])
                    iDef.append(dc)

                enList = dApi.getEnumListAltWithDetail(category=catName,
                                                       attribute=attName)

                tC = dApi.getTypeCode(category=catName, attribute=attName)
                tcontainer = 'Single'
                purpose = 'Describe'
                source = 'Recorded'
                contents = 'Text'
                #
                if tC is None:
                    self.__lfh.write("Missing data type attribute %s\n" %
                                     attName)
                elif tC in [
                        'code', 'atcode', 'name', 'idname', 'symop', 'fax',
                        'phone', 'email', 'code30', 'ec-type'
                ]:
                    purpose = 'Encode'
                    contents = 'Text'
                    source = 'Assigned'
                elif tC in ['ucode']:
                    purpose = 'Encode'
                    contents = 'Code'
                    source = 'Assigned'
                elif tC in ['line', 'uline', 'text']:
                    purpose = 'Describe'
                    source = 'Recorded'
                    contents = 'Text'
                elif tC in ['int']:
                    purpose = 'Number'
                    source = 'Recorded'
                    contents = 'Integer'
                elif tC in ['int-range']:
                    purpose = 'Number'
                    source = 'Recorded'
                    contents = 'Range'
                elif tC in ['float']:
                    purpose = 'Measurand'
                    source = 'Recorded'
                    contents = 'Real'
                elif tC in ['float-range']:
                    purpose = 'Measurand'
                    source = 'Recorded'
                    contents = 'Range'
                elif tC.startswith('yyyy'):
                    source = 'Assigned'
                    contents = 'Date'
                    purpose = 'Describe'

                if len(enList) > 0:
                    purpose = 'State'

                dc = DataCategory("type")
                dc.appendAttribute("purpose")
                dc.appendAttribute("source")
                dc.appendAttribute("contents")
                dc.appendAttribute("container")
                dc.append([purpose, source, contents, tcontainer])
                iDef.append(dc)
                #
                if (len(enList) > 0):
                    dc = DataCategory("enumeration_set")
                    dc.appendAttribute("state")
                    dc.appendAttribute("detail")
                    for en in enList:
                        dc.append([en[0], en[1]])
                    iDef.append(dc)

                dfv = dApi.getDefaultValue(category=catName, attribute=attName)
                bvList = dApi.getBoundaryList(category=catName,
                                              attribute=attName)
                if (((dfv is not None) and (dfv not in ['?', '.']))
                        or len(bvList) > 0):
                    row = []
                    dc = DataCategory("enumeration")
                    if dfv is not None:
                        dc.appendAttribute("default")
                        row.append(dfv)
                    if len(bvList) > 0:
                        dc.appendAttribute("range")
                        mminVp = -1000000
                        mmaxVp = 10000000
                        mminV = mmaxVp
                        mmaxV = mminVp
                        for bv in bvList:
                            minV = float(bv[0]) if bv[0] != '.' else mminVp
                            maxV = float(bv[1]) if bv[1] != '.' else mmaxVp
                            mminV = min(mminV, minV)
                            mmaxV = max(mmaxV, maxV)
                        if mminV == mminVp:
                            mminV = ''
                        if mmaxV == mmaxVp:
                            mmaxV = ''
                        row.append(str(mminV) + ":" + str(mmaxV))

                    dc.append(row)
                    iDef.append(dc)

        myIo.writeFile(outputFilePath="mmcif_pdbx_ddlm_auto.dic",
                       containerList=oCList)
Exemplo n.º 3
0
    def testGenDDLm(self):
        """Generating alternative DDLm metadata format. (starting point)"""
        try:
            myIo = IoAdapterPy(self.__verbose, self.__lfh)
            self.__containerList = myIo.readFile(
                inputFilePath=self.__pathPdbxDictionary)
            dApi = DictionaryApi(containerList=self.__containerList,
                                 consolidate=True,
                                 verbose=self.__verbose)
            parentD = dApi.getParentDictionary()
            #
            oCList = []
            dDef = DataContainer("mmcif_pdbx_ddlm_auto")
            dc = DataCategory("dictionary")
            dc.appendAttribute("title")
            dc.appendAttribute("class")
            dc.appendAttribute("version")
            dc.appendAttribute("date")
            dc.appendAttribute("ddl_conformance")
            dc.appendAttribute("text")
            dc.append([
                "mmcif_pdbx_ddlm_auto", "Instance", "latest", "2018-03-09",
                "ddlm best effort",
                "Software converted PDBx dictionary using DDLm semantics"
            ])
            dDef.append(dc)
            oCList.append(dDef)

            catIdx = dApi.getCategoryIndex()
            for catName in sorted(catIdx.keys()):
                attNameList = catIdx[catName]
                # created definition container -
                cDef = DefinitionContainer(catName)
                oCList.append(cDef)
                #
                dc = DataCategory("definition")
                dc.appendAttribute("id")
                dc.appendAttribute("scope")
                dc.appendAttribute("class")
                dc.appendAttribute("update")
                dc.append([catName, "Category", "Loop", "2018-03-09"])
                cDef.append(dc)
                val = dApi.getCategoryDescription(category=catName)
                dc = DataCategory("description")
                dc.appendAttribute("text")
                dc.append([val])
                cDef.append(dc)
                #
                dc = DataCategory("name")
                dc.appendAttribute("category_id")
                dc.appendAttribute("object_id")

                valList = dApi.getCategoryGroupList(category=catName)
                pcg = catName
                for val in valList:
                    if val != "inclusive_group":
                        pcg = val
                        break
                dc.append([catName, pcg])
                cDef.append(dc)

                valList = dApi.getCategoryKeyList(category=catName)
                if not valList:
                    self.__lfh.write("Missing caegory key for category %s\n" %
                                     catName)
                else:
                    dc = DataCategory("category")
                    dc.appendAttribute("key_id")
                    kItemName = CifName.itemName(catName, "synthetic_key")
                    dc.append([kItemName])
                    cDef.append(dc)

                    iDef = DefinitionContainer(kItemName)
                    self.__makeKeyItem(catName, "synthetic_key", valList, iDef)
                    oCList.append(iDef)

                for attName in attNameList:
                    itemName = CifName.itemName(catName, attName)
                    iDef = DefinitionContainer(itemName)

                    oCList.append(iDef)

                    #
                    dc = DataCategory("definition")
                    dc.appendAttribute("id")
                    dc.appendAttribute("scope")
                    dc.appendAttribute("class")
                    dc.appendAttribute("update")
                    dc.append([itemName, "Item", "Single", "2013-08-22"])
                    iDef.append(dc)
                    #
                    val = dApi.getDescription(category=catName,
                                              attribute=attName)
                    dc = DataCategory("description")
                    dc.appendAttribute("text")
                    dc.append([val])
                    iDef.append(dc)
                    #
                    dc = DataCategory("name")
                    dc.appendAttribute("category_id")
                    dc.appendAttribute("object_id")
                    #
                    if itemName in parentD:
                        dc.appendAttribute("linked_item_id")
                        dc.append([catName, attName, parentD[itemName][0]])
                    else:
                        dc.append([catName, attName])
                    iDef.append(dc)
                    #
                    #
                    aliasList = dApi.getItemAliasList(category=catName,
                                                      attribute=attName)
                    if aliasList:
                        dc = DataCategory("alias")
                        dc.appendAttribute("definition_id")
                        for alias in aliasList:
                            dc.append([alias[0]])
                        iDef.append(dc)

                    enList = dApi.getEnumListAltWithDetail(category=catName,
                                                           attribute=attName)

                    tC = dApi.getTypeCode(category=catName, attribute=attName)
                    tcontainer = "Single"
                    purpose = "Describe"
                    source = "Recorded"
                    contents = "Text"
                    #
                    if tC is None:
                        self.__lfh.write("Missing data type attribute %s\n" %
                                         attName)
                    elif tC in [
                            "code", "atcode", "name", "idname", "symop", "fax",
                            "phone", "email", "code30", "ec-type"
                    ]:
                        purpose = "Encode"
                        contents = "Text"
                        source = "Assigned"
                    elif tC in ["ucode"]:
                        purpose = "Encode"
                        contents = "Code"
                        source = "Assigned"
                    elif tC in ["line", "uline", "text"]:
                        purpose = "Describe"
                        source = "Recorded"
                        contents = "Text"
                    elif tC in ["int"]:
                        purpose = "Number"
                        source = "Recorded"
                        contents = "Integer"
                    elif tC in ["int-range"]:
                        purpose = "Number"
                        source = "Recorded"
                        contents = "Range"
                    elif tC in ["float"]:
                        purpose = "Measurand"
                        source = "Recorded"
                        contents = "Real"
                    elif tC in ["float-range"]:
                        purpose = "Measurand"
                        source = "Recorded"
                        contents = "Range"
                    elif tC.startswith("yyyy"):
                        source = "Assigned"
                        contents = "Date"
                        purpose = "Describe"

                    if enList:
                        purpose = "State"

                    dc = DataCategory("type")
                    dc.appendAttribute("purpose")
                    dc.appendAttribute("source")
                    dc.appendAttribute("contents")
                    dc.appendAttribute("container")
                    dc.append([purpose, source, contents, tcontainer])
                    iDef.append(dc)
                    #
                    if enList:
                        dc = DataCategory("enumeration_set")
                        dc.appendAttribute("state")
                        dc.appendAttribute("detail")
                        for en in enList:
                            dc.append([en[0], en[1]])
                        iDef.append(dc)

                    dfv = dApi.getDefaultValue(category=catName,
                                               attribute=attName)
                    bvList = dApi.getBoundaryList(category=catName,
                                                  attribute=attName)
                    if ((dfv is not None) and
                        (dfv not in ["?", "."])) or bvList:
                        row = []
                        dc = DataCategory("enumeration")
                        if dfv is not None:
                            dc.appendAttribute("default")
                            row.append(dfv)
                        if bvList:
                            dc.appendAttribute("range")
                            mminVp = -1000000
                            mmaxVp = 10000000
                            mminV = mmaxVp
                            mmaxV = mminVp
                            for bv in bvList:
                                minV = float(bv[0]) if bv[0] != "." else mminVp
                                maxV = float(bv[1]) if bv[1] != "." else mmaxVp
                                mminV = min(mminV, minV)
                                mmaxV = max(mmaxV, maxV)
                            if mminV == mminVp:
                                mminV = ""
                            if mmaxV == mmaxVp:
                                mmaxV = ""
                            row.append(str(mminV) + ":" + str(mmaxV))

                        dc.append(row)
                        iDef.append(dc)

            myIo.writeFile(outputFilePath=os.path.join(
                HERE, "test-output", "mmcif_pdbx_ddlm_auto.dic"),
                           containerList=oCList)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 4
0
    def __buildAttributeDefinition(self, atD, mD):
        """ Construct an attribute definition from input attribute dictionary
            containing metadata extracted from the XML schema, and from the
            input schema name mapping dictionary.
        Args:
            atD (dict): attribute metadata dictionary
            dictionaryMap (dict): mapping details for categories and attributes

        Returns:
                Attribute definition (object)

        """
        #
        atName = atD["name"]
        catName = atD["category"]

        #
        mapAtName = mD["at"] if "at" in mD else atName
        mapCatName = mD["cat"] if "cat" in mD else catName
        pCat = mD["pCat"] if "pCat" in mD else None
        pAt = mD["pAt"] if "pAt" in mD else None
        pType = mD["pType"] if "pType" in mD else None
        #
        itemName = CifName.itemName(mapCatName, mapAtName)
        #
        aliasAtName = atD["aliasName"] if "aliasName" in atD else None
        aliasCatName = atD[
            "aliasCategoryName"] if "aliasCategoryName" in atD else None
        #
        atDescription = atD["description"] if "description" in atD else None
        atDescription = self.__filterDescription(atDescription,
                                                 self.__catMappingD)
        atDescription = self.__filterDescription(atDescription,
                                                 self.__atMappingD)
        #
        mCode = "yes" if atD["mandatory"] == "mandatory" else "no"
        mCode = "yes" if mapAtName == "entry_id" else mCode
        #
        if atD["type"] not in self.__typeMap:
            logger.info("Unmapped type %r", atD["type"])
        #
        atType = self.__typeMap[
            atD["type"]] if atD["type"] in self.__typeMap else "UNKNOWN"
        #
        if atType == "text" and atDescription.find("comma separate") >= 0:
            atType = "alphanum-csv"
        if atType == "text" and "_date" in mapAtName and mapAtName != "report_creation_date":
            atType = "yyyy-mm-dd"
        #
        atType = pType if pType else atType

        if atType == "UNKNOWN":
            logger.info("Missing type mapping for %s %s %s", catName, atName,
                        atD["type"])
        #
        defA = DefinitionContainer(itemName)
        #
        dc = DataCategory("item_description",
                          attributeNameList=["description"])
        dc.append([atDescription])
        defA.append(dc)

        dc = DataCategory(
            "item",
            attributeNameList=["name", "category_id", "mandatory_code"])
        dc.append([itemName, mapCatName, mCode])
        defA.append(dc)
        #
        dc = DataCategory("item_type", attributeNameList=["code"])
        dc.append([atType])
        defA.append(dc)

        dc = DataCategory(
            "item_aliases",
            attributeNameList=["alias_name", "dictionary", "version"])
        dc.append([
            aliasCatName + "." + aliasAtName, self.__schemaPath,
            self.__schemaVersion
        ])
        defA.append(dc)
        #
        # Note - expect boundaries in pairs and 'inclusive' of endpoints
        #
        if "minIncl" in atD and "maxIncl" in atD and atD["minIncl"] and atD[
                "maxIncl"]:
            minB = atD["minIncl"]
            maxB = atD["maxIncl"]
            dc = DataCategory("item_range",
                              attributeNameList=["minimum", "maximum"])
            dc.append([minB, minB])
            dc.append([minB, maxB])
            dc.append([maxB, maxB])
            defA.append(dc)
        else:
            if atType == "float" and ((mapAtName.find("percent_") >= 0) or
                                      (mapAtName.find("percentile_") >= 0)):
                dc = DataCategory("item_range",
                                  attributeNameList=["minimum", "maximum"])
                minB = "0.0"
                maxB = "100.0"
                dc.append([minB, minB])
                dc.append([minB, maxB])
                dc.append([maxB, maxB])
                defA.append(dc)
        #
        if "enum" in atD and isinstance(atD["enum"], list) and atD["enum"]:
            dc = DataCategory("item_enumeration",
                              attributeNameList=["value", "detail"])
            for enumVal in atD["enum"]:
                dc.append([enumVal, "."])
            defA.append(dc)

        # -  add parent link relationships -
        if pCat and pAt:
            dc = DataCategory("item_linked",
                              attributeNameList=["child_name", "parent_name"])
            parentItemName = CifName.itemName(pCat, pAt)
            dc.append([itemName, parentItemName])
            defA.append(dc)
        #
        #
        return defA