Пример #1
0
    def testEditAttributes(self):
        """Test case -  get and extend atttribute names"""
        try:
            #
            dcA = DataCategory("A", self.__attributeList, self.__rowListAsciiA)
            self.assertEqual(0, dcA.getRowIndex())
            self.assertEqual(None, dcA.getCurrentAttribute())
            #
            na = len(dcA.getAttributeList())
            tL = dcA.getAttributeListWithOrder()
            self.assertEqual(len(tL), na)

            na = len(dcA.getAttributeList())
            self.assertEqual(dcA.appendAttribute("ColNew"), na + 1)
            row = dcA.getFullRow(0)
            self.assertEqual(row[na], "?")
            #
            row = dcA.getFullRow(dcA.getRowCount() + 1)
            for cV in row:
                self.assertEqual(cV, "?")

            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #2
0
 def test_cond_select_values(self, category_data):
     dcM = DataCategory('A', category_data['attributeListMiss'],
                        category_data['rowListUnicodeMiss'])
     atL = dcM.getAttributeList()
     for ii, at in enumerate(atL[1:]):
         assert len(
             dcM.selectIndices(category_data['testRowUnicodeMiss'][ii],
                               at)) == dcM.getRowCount()
     #
     print("Window %r" % [tt for tt in window(atL)])
     for atW in window(atL, size=1):
         assert len(
             dcM.selectValueListWhere(
                 atW, category_data['testRowUnicodeMiss'][-1],
                 category_data['attributeListMiss']
                 [-1])) == dcM.getRowCount()
     for atW in window(atL, size=2):
         assert len(
             dcM.selectValueListWhere(
                 atW, category_data['testRowUnicodeMiss'][-1],
                 category_data['attributeListMiss']
                 [-1])) == dcM.getRowCount()
     for atW in window(atL, size=3):
         assert len(
             dcM.selectValueListWhere(
                 atW, category_data['testRowUnicodeMiss'][-1],
                 category_data['attributeListMiss']
                 [-1])) == dcM.getRowCount()
     for atW in window(atL, size=4):
         assert len(
             dcM.selectValueListWhere(
                 atW, category_data['testRowUnicodeMiss'][-1],
                 category_data['attributeListMiss']
                 [-1])) == dcM.getRowCount()
Пример #3
0
 def test_compare_values(self, category_data):
     dcU = DataCategory('A', category_data['attributeList'],
                        category_data['rowListUnicode'])
     dcM = DataCategory('A', category_data['attributeListMiss'],
                        category_data['rowListUnicodeMiss'])
     na = dcU.getAttributeList()
     assert len(na) >= 1
     tupL = dcU.cmpAttributeValues(dcU)
     for tup in tupL:
         assert tup[1] == True
     tupL = dcU.cmpAttributeValues(dcM)
     for tup in tupL:
         if tup[0] in ['colC', 'colD']:
             assert tup[1] == False
         else:
             assert tup[1] == True
     #
     dcX = DataCategory('A', category_data['attributeList'],
                        category_data['rowListUnicode'])
     assert dcX.setValue(
         u'134ĆćĈĉĊċČ�Ď��đĒēĠġĢģĤĥĦħĨxyz',
         attributeName='colD',
         rowIndex=dcX.getRowCount() - 2)
     tupL = dcU.cmpAttributeValues(dcX)
     for tup in tupL:
         if tup[0] in ['colD']:
             assert tup[1] == False
         else:
             assert tup[1] == True
Пример #4
0
 def test_edit_extend(self, category_data):
     dcA = DataCategory('A', category_data['attributeList'],
                        category_data['rowListAsciiA'])
     na = len(dcA.getAttributeList())
     assert dcA.appendAttributeExtendRows('colNew') == na + 1
     row = dcA.getRow(dcA.getRowCount() - 1)
     assert row[na] == "?"
Пример #5
0
    def testCompareValues(self):
        """Test case - compare object values -"""
        try:
            dcU = DataCategory("A", self.__attributeList,
                               self.__rowListUnicode)
            dcM = DataCategory("A", self.__attributeListMiss,
                               self.__rowListUnicodeMiss)
            na = dcU.getAttributeList()
            self.assertGreaterEqual(len(na), 1)
            tupL = dcU.cmpAttributeValues(dcU)
            for tup in tupL:
                self.assertEqual(tup[1], True)
            tupL = dcU.cmpAttributeValues(dcM)
            for tup in tupL:
                if tup[0] in ["colC", "colD"]:
                    self.assertEqual(tup[1], False)
                else:
                    self.assertEqual(tup[1], True)
            #
            dcX = DataCategory("A", self.__attributeList,
                               self.__rowListUnicode)
            self.assertTrue(
                dcX.setValue(u"134ĆćĈĉĊċČčĎďĐđĒēĠġĢģĤĥĦħĨxyz",
                             attributeName="colD",
                             rowIndex=dcX.getRowCount() - 2))
            tupL = dcU.cmpAttributeValues(dcX)
            for tup in tupL:
                if tup[0] in ["colD"]:
                    self.assertEqual(tup[1], False)
                else:
                    self.assertEqual(tup[1], True)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #6
0
    def testGetValues(self):
        """Test case -  value getters"""
        try:
            dcU = DataCategory("A", self.__attributeList,
                               self.__rowListUnicode)
            aL = dcU.getAttributeList()
            logger.debug("Row length %r", dcU.getRowCount())
            for ii, v in enumerate(self.__testRowUnicode):
                at = aL[ii + 1]
                for j in range(0, dcU.getRowCount()):
                    logger.debug("ii %d j %d at %s val %r ", ii, j, at, v)
                    self.assertEqual(dcU.getValue(at, j), v)
                    self.assertEqual(dcU.getValueOrDefault(at, j, "mydefault"),
                                     v)
            #
            # negative indices are interpreted in the python manner
            self.assertEqual(dcU.getValueOrDefault("colOrd", -1, "default"), 9)

            self.assertRaises(IndexError, dcU.getValue, "colOrd",
                              dcU.getRowCount() + 1)
            self.assertRaises(ValueError, dcU.getValue, "badAtt", 0)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #7
0
    def test_edit_attributes(self, category_data):
        dcA = DataCategory('A', category_data['attributeList'],
                           category_data['rowListAsciiA'])
        assert 0 == dcA.getRowIndex()
        assert None == dcA.getCurrentAttribute()
        #
        na = len(dcA.getAttributeList())
        tL = dcA.getAttributeListWithOrder()
        assert len(tL) == na

        na = len(dcA.getAttributeList())
        assert dcA.appendAttribute("ColNew") == na + 1
        row = dcA.getFullRow(0)
        assert row[na] == "?"
        #
        row = dcA.getFullRow(dcA.getRowCount() + 1)
        for c in row:
            assert c == "?"
Пример #8
0
 def testEditExtend(self):
     """Test case -  category extension methods"""
     try:
         dcA = DataCategory("A", self.__attributeList, self.__rowListAsciiA)
         na = len(dcA.getAttributeList())
         self.assertEqual(dcA.appendAttributeExtendRows("colNew"), na + 1)
         row = dcA.getRow(dcA.getRowCount() - 1)
         self.assertEqual(row[na], "?")
         #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Пример #9
0
 def test_compare_attributes(self, category_data):
     dcU = DataCategory('A', category_data['attributeList'],
                        category_data['rowListUnicode'])
     dcM = DataCategory('A', category_data['attributeListMiss'],
                        category_data['rowListUnicodeMiss'])
     na = len(dcU.getAttributeList())
     t1, t2, t3 = dcU.cmpAttributeNames(dcU)
     assert len(t1) == 0
     assert len(t3) == 0
     assert len(t2) == na
     t1, t2, t3 = dcU.cmpAttributeNames(dcM)
     assert len(t1) == 0
     assert len(t3) == 3
     assert len(t2) == na
Пример #10
0
    def test_get_values(self, category_data):
        dcU = DataCategory('A', category_data['attributeList'],
                           category_data['rowListUnicode'])
        aL = dcU.getAttributeList()
        print("Row length %r " % dcU.getRowCount())
        for ii, v in enumerate(category_data['testRowUnicode']):
            at = aL[ii + 1]
            for j in range(0, dcU.getRowCount()):
                print("ii %d j %d at %s val %r " % (ii, j, at, v))
                assert dcU.getValue(at, j) == v
                assert dcU.getValueOrDefault(at, j, 'mydefault') == v
        #
        # negative indices are interpreted in the python manner
        assert dcU.getValueOrDefault('colOrd', -1, 'default') == 9

        with pytest.raises(IndexError):
            dcU.getValueOrDefault('colOrd', dcU.getRowCount() + 1, 0)
        with pytest.raises(ValueError):
            dcU.getValueOrDefault('badAtt', 0, 0)
Пример #11
0
 def testCompareAttributes(self):
     """Test case - compare object attributes -"""
     try:
         dcU = DataCategory("A", self.__attributeList,
                            self.__rowListUnicode)
         dcM = DataCategory("A", self.__attributeListMiss,
                            self.__rowListUnicodeMiss)
         na = len(dcU.getAttributeList())
         t1, t2, t3 = dcU.cmpAttributeNames(dcU)
         self.assertEqual(len(t1), 0)
         self.assertEqual(len(t3), 0)
         self.assertEqual(len(t2), na)
         t1, t2, t3 = dcU.cmpAttributeNames(dcM)
         self.assertEqual(len(t1), 0)
         self.assertEqual(len(t3), 3)
         self.assertEqual(len(t2), na)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Пример #12
0
    def testCondSelectValues(self):
        """Test case - value selections -
        """
        try:
            dcM = DataCategory("A", self.__attributeListMiss,
                               self.__rowListUnicodeMiss)
            # self.__testRowUnicodeMiss = [u'someData', 100222, None, '?', '.', u'abcdĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨxyz', 234.2345]
            # self.__attributeListMiss
            atL = dcM.getAttributeList()
            for ii, at in enumerate(atL[1:]):
                self.assertEqual(
                    len(dcM.selectIndices(self.__testRowUnicodeMiss[ii], at)),
                    dcM.getRowCount())
            #
            logger.debug("Window %r", [tt for tt in window(atL)])
            for atW in window(atL, size=1):
                self.assertEqual(
                    len(
                        dcM.selectValueListWhere(
                            atW, self.__testRowUnicodeMiss[-1],
                            self.__attributeListMiss[-1])), dcM.getRowCount())
            for atW in window(atL, size=2):
                self.assertEqual(
                    len(
                        dcM.selectValueListWhere(
                            atW, self.__testRowUnicodeMiss[-1],
                            self.__attributeListMiss[-1])), dcM.getRowCount())
            for atW in window(atL, size=3):
                self.assertEqual(
                    len(
                        dcM.selectValueListWhere(
                            atW, self.__testRowUnicodeMiss[-1],
                            self.__attributeListMiss[-1])), dcM.getRowCount())
            for atW in window(atL, size=4):
                self.assertEqual(
                    len(
                        dcM.selectValueListWhere(
                            atW, self.__testRowUnicodeMiss[-1],
                            self.__attributeListMiss[-1])), dcM.getRowCount())

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #13
0
def process_entry(file_in, file_out):
    try:
        cif_file = gemmi.cif.read(file_in)  # pylint: disable=no-member
        data_block = cif_file[0]
    except Exception as e:
        logger.error("Failed to read cif file in Gemmi")
        logger.error(e)
        return 1

    logging.info("Finding Centre of Mass")
    com = get_center_of_mass(data_block)
    if not com:
        return 1

    try:
        io = IoAdapterCore()
        ccL = io.readFile(file_in)
    except Exception as e:
        logger.error("Failed to read cif file using IoAdapterCore %s", e)
        return 1

    if len(ccL) == 0:
        logger.error("No data parsed from file")
        return 1

    # First block only
    b0 = ccL[0]

    obj = b0.getObj("struct")
    # If category does not exist
    if obj is None:
        # Need entry.id
        eid = "XXXX"
        eobj = b0.getObj("entry")
        if eobj:
            if "id" in eobj.getAttributeList():
                eid = eobj.getValue("id", 0)
        obj = DataCategory("struct")
        obj.appendAttribute("entry_id")
        obj.setValue(eid, "entry_id", 0)
        ccL[0].append(obj)

    newdata = [["pdbx_center_of_mass_x", com.x],
               ["pdbx_center_of_mass_y", com.y],
               ["pdbx_center_of_mass_z", com.z]]
    for [it, val] in newdata:
        if it not in obj.getAttributeList():
            obj.appendAttribute(it)
        obj.setValue(str(val), it, 0)

    try:
        logging.info("Writing mmcif file: %s", file_out)
        ret = io.writeFile(file_out, ccL)
        if not ret:
            logger.info("Writing failed error %s", ret)
            return 1
    except Exception as e:
        logger.error("Failed to write ccif file in IoAdapater %s", e)
        return 1

    # existing_data = data_block.get_mmcif_category('_struct.')
    # new_data = {
    #     **existing_data,
    #     'pdbx_center_of_mass_x': [com.x],
    #     'pdbx_center_of_mass_y': [com.y],
    #     'pdbx_center_of_mass_z': [com.z]
    # }
    # logging.info("Writing mmcif file: %s", file_out)
    # try:
    #     data_block.set_mmcif_category('_struct.', new_data)
    #     cif_file.write_file(file_out)
    # except Exception as e:
    #     logger.error("Failed to write cif file in Gemmi")
    #     logger.error(e)
    #     return 1
    return 0
Пример #14
0
    def __parser(self,
                 tokenizer,
                 containerList,
                 categorySelectionD=None,
                 excludeFlag=False):
        """ Parser for PDBx data files and dictionaries.

            Input - tokenizer() reentrant method recognizing data item names (_category.attribute)
                    quoted strings (single, double and multi-line semi-colon delimited), and unquoted
                    strings.

                    containerList -  list-type container for data and definition objects parsed from
                                     from the input file.

            On return:
                    The input containerList is appended with data and definition objects -
        """
        catSelectD = categorySelectionD if categorySelectionD is not None else {}
        logger.debug("Exclude Flag %r Category selection %r", excludeFlag,
                     catSelectD)
        # Working container - data or definition
        curContainer = None
        # the last container of type data -
        previousDataContainer = None
        #
        # Working category container
        categoryIndex = {}
        curCategory = None
        #
        curRow = None
        state = None

        # Find the first reserved word and begin capturing data.
        #
        while True:
            curCatName, curAttName, curQuotedString, curWord = next(tokenizer)
            if curWord is None:
                continue
            reservedWord, state = self.__getState(curWord)
            if reservedWord is not None:
                break

        while True:
            #
            #  Set the current state  -
            #
            #  At this point in the processing cycle we are expecting a token containing
            #  either a '_category.attribute'  or a reserved word.
            #
            if curCatName is not None:
                state = "ST_KEY_VALUE_PAIR"
            elif curWord is not None:
                reservedWord, state = self.__getState(curWord)
            else:
                self.__syntaxError("Miscellaneous syntax error")
                return

            #
            # Process  _category.attribute  value assignments
            #
            if state == "ST_KEY_VALUE_PAIR":
                try:
                    curCategory = categoryIndex[curCatName]
                except KeyError:
                    # A new category is encountered - create a container and add a row
                    curCategory = categoryIndex[curCatName] = DataCategory(
                        curCatName)
                    #
                    #  check if we have all of the selection
                    if not excludeFlag and self.__allSelected(
                            curContainer, catSelectD):
                        return
                    try:
                        if catSelectD:
                            if not excludeFlag and curCatName in catSelectD:
                                curContainer.append(curCategory)
                            elif excludeFlag and curCatName not in catSelectD:
                                curContainer.append(curCategory)
                            else:
                                logger.debug(
                                    "Skipped unselected/excluded category %s",
                                    curCatName)
                        else:
                            curContainer.append(curCategory)
                    except AttributeError:
                        self.__syntaxError(
                            "Category cannot be added to  data_ block")
                        return

                    curRow = []
                    curCategory.append(curRow)
                else:
                    # Recover the existing row from the category
                    try:
                        # curRow = curCategory[0]
                        curRow = curCategory.getRow(0)
                    except IndexError:
                        self.__syntaxError(
                            "Internal index error accessing category data")
                        return

                # Check for duplicate attributes and add attribute to table.
                if curAttName in curCategory.getAttributeList():
                    self.__syntaxError(
                        "Duplicate attribute encountered in category")
                    return
                else:
                    curCategory.appendAttribute(curAttName)

                # Get the data for this attribute from the next token
                tCat, _, curQuotedString, curWord = next(tokenizer)

                if tCat is not None or (curQuotedString is None
                                        and curWord is None):
                    self.__syntaxError("Missing data for item _%s.%s" %
                                       (curCatName, curAttName))

                if curWord is not None:
                    #
                    # Validation check token for misplaced reserved words  -
                    #
                    reservedWord, state = self.__getState(curWord)
                    if reservedWord is not None:
                        self.__syntaxError("Unexpected reserved word: %s" %
                                           (reservedWord))

                    curRow.append(curWord)

                elif curQuotedString is not None:
                    curRow.append(curQuotedString)

                else:
                    self.__syntaxError("Missing value in item-value pair")

                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)
                continue

            #
            # Process a loop_ declaration and associated data -
            #
            elif state == "ST_TABLE":

                # The category name in the next curCatName,curAttName pair
                #    defines the name of the category container.
                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)

                if curCatName is None or curAttName is None:
                    self.__syntaxError("Unexpected token in loop_ declaration")
                    return

                # Check for a previous category declaration.
                if curCatName in categoryIndex:
                    self.__syntaxError(
                        "Duplicate category declaration in loop_")
                    return

                curCategory = DataCategory(curCatName)

                #
                #  check if we have all of the selection
                if not excludeFlag and self.__allSelected(
                        curContainer, catSelectD):
                    return
                try:
                    if catSelectD:
                        if not excludeFlag and curCatName in catSelectD:
                            curContainer.append(curCategory)
                        elif excludeFlag and curCatName not in catSelectD:
                            curContainer.append(curCategory)
                        else:
                            logger.debug(
                                "Skipped unselected/excluded category %s",
                                curCatName)
                    else:
                        curContainer.append(curCategory)
                except AttributeError:
                    self.__syntaxError(
                        "loop_ declaration outside of data_ block or save_ frame"
                    )
                    return

                curCategory.appendAttribute(curAttName)

                # Read the rest of the loop_ declaration
                while True:
                    curCatName, curAttName, curQuotedString, curWord = next(
                        tokenizer)

                    if curCatName is None:
                        break

                    if curCatName != curCategory.getName():
                        self.__syntaxError(
                            "Changed category name in loop_ declaration")
                        return

                    curCategory.appendAttribute(curAttName)

                # If the next token is a 'word', check it for any reserved words -
                if curWord is not None:
                    reservedWord, state = self.__getState(curWord)
                    if reservedWord is not None:
                        if reservedWord == "stop":
                            return
                        else:
                            self.__syntaxError(
                                "Unexpected reserved word after loop declaration: %s"
                                % (reservedWord))

                # Read the table of data for this loop_ -
                while True:
                    curRow = []
                    curCategory.append(curRow)

                    for _ in curCategory.getAttributeList():
                        if curWord is not None:
                            curRow.append(curWord)
                        elif curQuotedString is not None:
                            curRow.append(curQuotedString)

                        curCatName, curAttName, curQuotedString, curWord = next(
                            tokenizer)

                    # loop_ data processing ends if -

                    # A new _category.attribute is encountered
                    if curCatName is not None:
                        break

                    # A reserved word is encountered
                    if curWord is not None:
                        reservedWord, state = self.__getState(curWord)
                        if reservedWord is not None:
                            break

                continue

            elif state == "ST_DEFINITION":
                # Ignore trailing unnamed saveframe delimiters e.g. 'save'
                sName = self.__getContainerName(curWord)
                if sName:
                    curContainer = DefinitionContainer(sName)
                    containerList.append(curContainer)
                    categoryIndex = {}
                    curCategory = None
                else:
                    # reset current container to the last data contatiner
                    curContainer = previousDataContainer

                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)

            elif state == "ST_DATA_CONTAINER":
                #
                dName = self.__getContainerName(curWord)
                if not dName:
                    dName = "unidentified"
                curContainer = DataContainer(dName)
                containerList.append(curContainer)
                categoryIndex = {}
                curCategory = None
                previousDataContainer = curContainer
                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)

            elif state == "ST_STOP":
                ###
                # curCatName, curAttName, curQuotedString, curWord = tokenizer.next()
                continue

            elif state == "ST_GLOBAL":
                curContainer = DataContainer("blank-global")
                curContainer.setGlobal()
                containerList.append(curContainer)
                categoryIndex = {}
                curCategory = None
                curCatName, curAttName, curQuotedString, curWord = next(
                    tokenizer)

            elif state == "ST_UNKNOWN":
                self.__syntaxError("Unrecogized syntax element: " +
                                   str(curWord))
                return