Exemplo n.º 1
0
    def testRowDictInitialization(self):
        """Test case -  Row dictionary initialization of a data category and data block"""
        try:
            #
            rLen = 10
            fn = self.__pathOutputFile5
            attributeNameList = ["a", "b", "c", "d"]
            rowList = [{"a": 1, "b": 2, "c": 3, "d": 4} for i in range(rLen)]
            nameCat = "myCategory"
            #
            #
            curContainer = DataContainer("myblock")
            aCat = DataCategory(nameCat, attributeNameList, rowList)
            aCat.append({"a": 1, "b": 2, "c": 3, "d": 4})
            aCat.append({"a": 1, "b": 2, "c": 3, "d": 4})
            aCat.extend(rowList)
            curContainer.append(aCat)
            aCat.renameAttributes({"a": "aa", "b": "bb", "c": "cc", "d": "dd"})
            aCat.setName("renamedCategory")
            #
            #
            myContainerList = []
            myContainerList.append(curContainer)
            ofh = open(fn, "w")
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myContainerList)
            ofh.close()

            myContainerList = []
            ifh = open(fn, "r")
            pRd = PdbxReader(ifh)
            pRd.read(myContainerList)
            ifh.close()
            for container in myContainerList:
                for objName in container.getObjNameList():
                    name, aList, rList = container.getObj(objName).get()
                    logger.debug("Recovered data category  %s", name)
                    logger.debug("Attribute list           %r", repr(aList))
                    logger.debug("Row list                 %r", repr(rList))
            self.assertEqual(len(myContainerList), 1)
            self.assertEqual(len(rList), 2 * rLen + 2)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 2
0
    def testReadSFDataFile(self, reader_paths):
        myContainerList = []
        ifh = open(str(reader_paths['pathSFDataFile']), "r")
        pRd = PdbxReader(ifh)
        pRd.read(myContainerList)
        c0 = myContainerList[0]
        #
        catObj = c0.getObj("refln")
        if catObj is None:
            return False

        # nRows = catObj.getRowCount()
        #
        # Get column name index.
        #
        itDict = {}
        itNameList = catObj.getItemNameList()
        for idxIt, itName in enumerate(itNameList):
            itDict[str(itName).lower()] = idxIt
            #
        idf = itDict['_refln.f_meas_au']
        idsigf = itDict['_refln.f_meas_sigma_au']
        minR = 100
        maxR = -1
        sumR = 0
        icount = 0
        for row in catObj.getRowList():
            try:
                f = float(row[idf])
                sigf = float(row[idsigf])
                ratio = sigf / f
                # self.lfh.write(" %f %f %f\n" % (f,sigf,ratio))
                maxR = max(maxR, ratio)
                minR = min(minR, ratio)
                sumR += ratio
                icount += 1
            except Exception:
                continue

        ifh.close()
        print("f/sig(f) min %f max %f avg %f count %d\n" %
              (minR, maxR, sumR / icount, icount))
        assert icount == 99242
Exemplo n.º 3
0
    def testReadWriteDataFile(self):
        """Test case -  data file read write test
        """

        try:
            #
            myDataList = []
            with open(self.__pathPdbxDataFile, "r") as ifh:
                pRd = PdbxReader(ifh)
                pRd.read(myDataList)

            with open(self.__pathOutputFile1, "w") as ofh:
                pWr = PdbxWriter(ofh)
                pWr.write(myDataList)

            self.assertEqual(len(myDataList), 1)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 4
0
    def test_single_row_file(self, rw_data):
        myDataList = []
        with open(str(rw_data['pathTestFile']), "r") as ifh:
            pRd = PdbxReader(ifh)
            pRd.read(myDataList)

        myBlock = myDataList[0]
        myCat = myBlock.getObj('symmetry')
        print("----attribute list %r\n" % myCat.getAttributeList())
        row = myCat.getRow(0)
        print("----ROW %r\n" % row)
        #
        # myCat.dumpIt()

        with open(str(rw_data['pathOutputFile2']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)

        assert len(myDataList) == 1
Exemplo n.º 5
0
    def _testmerge(self, pathin):
        with open(pathin, "r") as ifh:
            pdbxr = PdbxReader(ifh)
            dlist = []
            pdbxr.read(dlist)
            # Two blocks
            self.assertEqual(len(dlist), 2, "Two blocks in merge")

            block = dlist[0]

            # Merge
            cat = block.getObj('struct')
            self.assertIsNotNone(cat, 'Missing struct category')
            self.assertEqual(cat.getRowCount(), 1, "Should only have a single row")
            rd = cat.getRowItemDict(0)
            # print(rd)
            self.assertEqual(rd, {'_struct.title': 'Start title',
                                  '_struct.pdbx_descriptor': 'Override descriptor',
                                  '_struct.new': 'Something to add'},
                             'struct category mismatch')

            # Merge
            cat = block.getObj('exptl')
            self.assertIsNotNone(cat, 'Missing exptl category')
            self.assertEqual(cat.getRowCount(), 1, "Should only have a single row")
            rd = cat.getRowItemDict(0)
            # print(rd)
            self.assertEqual(rd, {'_exptl.method': 'NEW', '_exptl.entry_id': 'something',
                                  '_exptl.absorpt_coefficient_mu': '?', '_exptl.details': 'some details'},
                             'exptl category mismatch')

            # Replace category non-existant
            cat = block.getObj('third')
            self.assertIsNotNone(cat, 'Missing third category')
            self.assertEqual(cat.getRowCount(), 3, "Should only have a single row")
            rd = cat.getRowItemDict(0)
            self.assertEqual(rd, {'_third.id': '1', '_third.val': 'a'},
                             'third category mismatch')
            rd = cat.getRowItemDict(1)
            self.assertEqual(rd, {'_third.id': '2', '_third.val': 'b'},
                             'third category mismatch')
Exemplo n.º 6
0
 def __read(self):
     if not self.__filePath:
         return
     #
     try:
         ifh = open(self.__filePath, "r")
         pRd = PdbxReader(ifh)
         pRd.read(self.__dataList)
         ifh.close()
         if self.__dataList:
             self.__container = self.__dataList[0]
             self.__blockID = self.__container.getName()
             idx = 0
             for container in self.__dataList:
                 self.__dataMap[container.getName()] = idx
                 idx += 1
             #
         #
     except Exception as e:
         self.__lfh.write("Read %s failed %s.\n" %
                          (self.__filePath, str(e)))
Exemplo n.º 7
0
    def test_update_data_file(self, writer_paths):
        myDataList = []

        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        aCat.append((1, 2, 3, 4, 5, 6, 7))
        curContainer.append(aCat)
        myDataList.append(curContainer)
        with open(str(writer_paths['pathOutputFile1']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)
        #
        # Read and update the data -
        #
        myDataList = []
        with open(str(writer_paths['pathOutputFile1']), "r") as ifh:
            pRd = PdbxReader(ifh)
            pRd.read(myDataList)
        #
        myBlock = myDataList[0]
        # myBlock.printIt()
        myCat = myBlock.getObj('pdbx_seqtool_mapping_ref')
        # myCat.printIt()
        for iRow in range(0, myCat.getRowCount()):
            myCat.setValue('some value', 'ref_mon_id', iRow)
            myCat.setValue(100, 'ref_mon_num', iRow)
        with open(str(writer_paths['pathOutputFile2']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)
        assert len(myDataList) == 1
Exemplo n.º 8
0
    def test_update_data_file(self, rw_data):
        myDataList = []

        curContainer = DataContainer("myblock")
        aCat = DataCategory("pdbx_seqtool_mapping_ref")
        aCat.appendAttribute("ordinal")
        aCat.appendAttribute("entity_id")
        aCat.appendAttribute("auth_mon_id")
        aCat.appendAttribute("auth_mon_num")
        aCat.appendAttribute("pdb_chain_id")
        aCat.appendAttribute("ref_mon_id")
        aCat.appendAttribute("ref_mon_num")
        aCat.append([9, 2, 3, 4, 5, 6, 7])
        aCat.append([10, 2, 3, 4, 5, 6, 7])
        aCat.append([11, 2, 3, 4, 5, 6, 7])
        aCat.append([12, 2, 3, 4, 5, 6, 7])

        curContainer.append(aCat)
        myDataList.append(curContainer)
        ofh = open(str(rw_data['pathOutputFile1']), "w")
        pdbxW = PdbxWriter(ofh)
        pdbxW.write(myDataList)
        ofh.close()

        myDataList = []
        ifh = open(str(rw_data['pathOutputFile1']), "r")
        pRd = PdbxReader(ifh)
        pRd.read(myDataList)
        ifh.close()
        myBlock = myDataList[0]
        myCat = myBlock.getObj('pdbx_seqtool_mapping_ref')
        for iRow in range(0, myCat.getRowCount()):
            myCat.setValue('some value', 'ref_mon_id', iRow)
            myCat.setValue(100, 'ref_mon_num', iRow)

        with open(str(rw_data['pathOutputFile2']), "w") as ofh:
            pdbxW = PdbxWriter(ofh)
            pdbxW.write(myDataList)

        assert len(myDataList) == 1
Exemplo n.º 9
0
    def __testReaders(self, fPath):
        """Tests python and IoAdapter readers and checks values"""
        # Python reader

        myContainerList = []
        with open(fPath, "r") as ifh:
            pRd = PdbxReader(ifh)
            pRd.read(myContainerList)

        self.__testValues(myContainerList)

        # C++ IoAdapter reader
        try:
            io = IoAdapter(raiseExceptions=True)
            containerList = io.readFile(fPath, outDirPath=self.__pathOutputDir)
            logger.debug("Read %d data blocks", len(containerList))
            self.assertEqual(len(containerList), 1)
        except Exception as e:
            logger.error("Failing with %s", str(e))
            self.fail()

        self.__testValues(containerList)
Exemplo n.º 10
0
    def test_row_list_initialization(self, rw_data):
        fn = rw_data['pathOutputFile4']
        attributeNameList = ['aOne', 'aTwo', 'aThree', 'aFour', 'aFive', 'aSix', 'aSeven', 'aEight', 'aNine', 'aTen']
        rowList = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
                   ]
        nameCat = 'myCategory'

        curContainer = DataContainer("myblock")
        aCat = DataCategory(nameCat, attributeNameList, rowList)
        curContainer.append(aCat)

        myContainerList = []
        myContainerList.append(curContainer)
        ofh = open(str(fn), "w")
        pdbxW = PdbxWriter(ofh)
        pdbxW.write(myContainerList)
        ofh.close()

        myContainerList = []
        ifh = open(str(fn), "r")
        pRd = PdbxReader(ifh)
        pRd.read(myContainerList)
        ifh.close()
        for container in myContainerList:
            for objName in container.getObjNameList():
                name, aList, rList = container.getObj(objName).get()
                print("Recovered data category  %s\n" % name)
                print("Attribute list           %r\n" % repr(aList))
                print("Row list                 %r\n" % repr(rList))
        assert len(myContainerList) == 1
Exemplo n.º 11
0
    def test_row_dict_initialization(self, rw_data):
        rLen = 10
        fn = rw_data['pathOutputFile5']
        attributeNameList = ['a', 'b', 'c', 'd']
        rowList = [{'a': 1, 'b': 2, 'c': 3, 'd': 4} for i in range(rLen)]
        nameCat = 'myCategory'
        #
        #
        curContainer = DataContainer("myblock")
        aCat = DataCategory(nameCat, attributeNameList, rowList)
        aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4})
        aCat.append({'a': 1, 'b': 2, 'c': 3, 'd': 4})
        aCat.extend(rowList)
        curContainer.append(aCat)
        aCat.renameAttributes({'a': 'aa', 'b': 'bb', 'c': 'cc', 'd': 'dd'})
        aCat.setName('renamedCategory')
        #
        #
        myContainerList = []
        myContainerList.append(curContainer)
        ofh = open(str(fn), "w")
        pdbxW = PdbxWriter(ofh)
        pdbxW.write(myContainerList)
        ofh.close()

        myContainerList = []
        ifh = open(str(fn), "r")
        pRd = PdbxReader(ifh)
        pRd.read(myContainerList)
        ifh.close()
        for container in myContainerList:
            for objName in container.getObjNameList():
                name, aList, rList = container.getObj(objName).get()
                print("Recovered data category  %s\n" % name)
                print("Attribute list           %r\n" % repr(aList))
                print("Row list                 %r\n" % repr(rList))
        assert len(myContainerList) == 1
        assert len(rList) == 2 * rLen + 2
Exemplo n.º 12
0
    def _testmerge(self, pathin):
        with open(pathin, "r") as ifh:
            pdbxr = PdbxReader(ifh)
            dlist = []
            pdbxr.read(dlist)
            # Two blocks
            self.assertEqual(len(dlist), 2, "Two blocks in merge")

            block = dlist[0]

            # Merge
            cat = block.getObj("struct")
            self.assertIsNotNone(cat, "Missing struct category")
            self.assertEqual(cat.getRowCount(), 1, "Should only have a single row")
            rd = cat.getRowItemDict(0)
            # print(rd)
            self.assertEqual(rd, {"_struct.title": "Start title", "_struct.pdbx_descriptor": "Override descriptor", "_struct.new": "Something to add"}, "struct category mismatch")

            # Merge
            cat = block.getObj("exptl")
            self.assertIsNotNone(cat, "Missing exptl category")
            self.assertEqual(cat.getRowCount(), 1, "Should only have a single row")
            rd = cat.getRowItemDict(0)
            # print(rd)
            self.assertEqual(
                rd, {"_exptl.method": "NEW", "_exptl.entry_id": "something", "_exptl.absorpt_coefficient_mu": "?", "_exptl.details": "some details"}, "exptl category mismatch"
            )

            # Replace category non-existant
            cat = block.getObj("third")
            self.assertIsNotNone(cat, "Missing third category")
            self.assertEqual(cat.getRowCount(), 3, "Should only have a single row")
            rd = cat.getRowItemDict(0)
            self.assertEqual(rd, {"_third.id": "1", "_third.val": "a"}, "third category mismatch")
            rd = cat.getRowItemDict(1)
            self.assertEqual(rd, {"_third.id": "2", "_third.val": "b"}, "third category mismatch")
Exemplo n.º 13
0
    def readFile(self,
                 inputFilePath,
                 enforceAscii=False,
                 selectList=None,
                 excludeFlag=False,
                 logFilePath=None,
                 outDirPath=None,
                 cleanUp=False,
                 **kwargs):
        """Parse the data blocks in the input mmCIF format data file into list of data or definition containers.  The data category content within
            each data block is stored a collection of DataCategory objects within each container.

        Args:
            inputFilePath (string): Input file path
            enforceAscii (bool, optional): Flag to requiring ASCII encoding. See encoding error options.
            selectList (List, optional):  List of data category names to be extracted or excluded from the input file (default: select/extract)
            excludeFlag (bool, optional): Flag to indicate selectList should be treated as an exclusion list
            logFilePath (string, optional): Log file path (if not provided this will be derived from the input file.)
            outDirPath (string, optional): Path for translated/re-encoded files and default logfiles.
            cleanUp (bool, optional): Flag to automatically remove logs and temporary files on exit.
            **kwargs: Placeholder for missing keyword arguments.

        Returns:
            List of DataContainers: Contents of input file parsed into a list of DataContainer objects.

        """
        if kwargs:
            logger.warning("Unsupported keyword arguments %s", kwargs.keys())
        filePath = str(inputFilePath)
        # oPath = outDirPath if outDirPath else '.'
        oPath = self._chooseTemporaryPath(inputFilePath, outDirPath=outDirPath)
        containerList = []
        if enforceAscii:
            encoding = "ascii"
        else:
            encoding = "utf-8"
        try:
            #
            lPath = logFilePath
            if not lPath:
                lPath = self._getDefaultFileName(filePath,
                                                 fileType="cif-parser-log",
                                                 outDirPath=oPath)
            #
            self._setLogFilePath(lPath)
            # ---
            if self.__isLocal(filePath) and not self._fileExists(filePath):
                return []
            #
            if sys.version_info[0] > 2:
                if self.__isLocal(filePath):
                    filePath = self._uncompress(filePath, oPath)
                    with open(filePath,
                              "r",
                              encoding=encoding,
                              errors=self._readEncodingErrors) as ifh:
                        pRd = PdbxReader(ifh)
                        pRd.read(containerList,
                                 selectList,
                                 excludeFlag=excludeFlag)
                else:
                    with closing(requests.get(filePath)) as ifh:
                        it = (line.decode(encoding)
                              for line in ifh.iter_lines())
                        pRd = PdbxReader(it)
                        pRd.read(containerList,
                                 selectList,
                                 excludeFlag=excludeFlag)
            else:
                if self.__isLocal(filePath):
                    filePath = self._uncompress(filePath, oPath)
                    if enforceAscii:
                        with io.open(filePath,
                                     "r",
                                     encoding=encoding,
                                     errors=self._readEncodingErrors) as ifh:
                            pRd = PdbxReader(ifh)
                            pRd.read(containerList,
                                     selectList,
                                     excludeFlag=excludeFlag)
                    else:
                        with open(filePath, "r") as ifh:
                            pRd = PdbxReader(ifh)
                            pRd.read(containerList,
                                     selectList,
                                     excludeFlag=excludeFlag)
                else:
                    with closing(requests.get(filePath)) as ifh:
                        it = (line.decode(encoding)
                              for line in ifh.iter_lines())
                        pRd = PdbxReader(it)
                        pRd.read(containerList,
                                 selectList,
                                 excludeFlag=excludeFlag)
            if cleanUp:
                self._cleanupFile(lPath, lPath)
                self._cleanupFile(filePath != str(inputFilePath), filePath)
            self._setContainerProperties(containerList,
                                         locator=str(inputFilePath),
                                         load_date=self._getTimeStamp(),
                                         uid=uuid.uuid4().hex)
        except (PdbxError, PdbxSyntaxError) as ex:
            msg = "File %r with %s" % (filePath, str(ex))
            self._appendToLog([msg])
            self._cleanupFile(lPath and cleanUp, lPath)
            if self._raiseExceptions:
                raise_from(ex, None)
                # raise ex from None
        except Exception as e:
            msg = "File %r with %s" % (filePath, str(e))
            self._appendToLog([msg])
            self._cleanupFile(lPath and cleanUp, lPath)
            if self._raiseExceptions:
                raise e
            else:
                logger.error("Failing read for %s with %s", filePath, str(e))
        return containerList
Exemplo n.º 14
0
    def __readSchemaMap(self, schemaMapFile):
        """Read RCSB schema map file and return the list of table names, attribute definitions,
        attribute mapping, table and attribute abbreviations.
        """
        tableNameList = []
        atDefList = []
        atMapList = []
        tableAbbrevD = {}
        attribAbbrevD = {}
        try:
            #
            myContainerList = []
            ifh = open(schemaMapFile, "r", encoding="utf-8")
            pRd = PdbxReader(ifh)
            pRd.read(myContainerList)
            ifh.close()
            #
            for myContainer in myContainerList:
                cN = str(myContainer.getName()).lower()
                #
                # read schema details --
                #
                if cN == "rcsb_schema":
                    #
                    catObj = myContainer.getObj("rcsb_table")
                    if catObj is not None:
                        i1 = catObj.getAttributeIndex("table_name")
                        for row in catObj.getRowList():
                            tableNameList.append(row[i1])
                    #
                    catObj = myContainer.getObj("rcsb_attribute_def")
                    atList = [
                        "table_name", "attribute_name", "data_type",
                        "index_flag", "null_flag", "width", "precision",
                        "populated"
                    ]
                    indList = []
                    if catObj is not None:
                        for at in atList:
                            indList.append(catObj.getAttributeIndex(at))
                        for row in catObj.getRowList():
                            dD = {}
                            for ii, at in enumerate(atList):
                                dD[at] = row[indList[ii]]
                            atDefList.append(dD)
                    #
                    # _rcsb_table_abbrev.table_name
                    # _rcsb_table_abbrev.table_abbrev
                    #
                    catObj = myContainer.getObj("rcsb_table_abbrev")
                    if catObj is not None:
                        i1 = catObj.getAttributeIndex("table_name")
                        i2 = catObj.getAttributeIndex("table_abbrev")
                        for row in catObj.getRowList():
                            tableAbbrevD[row[i1]] = row[i2]
                    #
                    # _rcsb_attribute_abbrev.table_name
                    # _rcsb_attribute_abbrev.attribute_name
                    # _rcsb_attribute_abbrev.attribute_abbrev

                    catObj = myContainer.getObj("rcsb_attribute_abbrev")
                    if catObj is not None:
                        i1 = catObj.getAttributeIndex("table_name")
                        i2 = catObj.getAttributeIndex("attribute_name")
                        i3 = catObj.getAttributeIndex("attribute_abbrev")
                        for row in catObj.getRowList():
                            if row[i1] not in attribAbbrevD:
                                attribAbbrevD[row[i1]] = {}
                            attribAbbrevD[row[i1]][row[i2]] = row[i3]

                # read attribute mapping details --
                #
                elif cN == "rcsb_schema_map":
                    catObj = myContainer.getObj("rcsb_attribute_map")
                    atList = [
                        "target_table_name", "target_attribute_name",
                        "source_item_name", "condition_id", "function_id"
                    ]
                    indList = []
                    if catObj is not None:
                        for at in atList:
                            indList.append(catObj.getAttributeIndex(at))
                        for row in catObj.getRowList():
                            dD = {}
                            for ii, at in enumerate(atList):
                                dD[at] = row[indList[ii]]
                            atMapList.append(dD)

                else:
                    logger.error("+ERROR -unanticipated data container %s", cN)

        except Exception as e:
            logger.error("+ERROR - error processing schema map file %s",
                         schemaMapFile)
            logger.exception("Failing with %s", str(e))

        return tableNameList, atDefList, atMapList, tableAbbrevD, attribAbbrevD