def testReplaceValues(self): """Test case - replace values""" try: dcU = DataCategory("A", self.__attributeListMiss, self.__rowListUnicodeMiss) at = self.__attributeListMiss[3] curV = self.__testRowUnicodeMiss[2] self.assertEqual(dcU.replaceValue(curV, "newVal", at), dcU.getRowCount()) at = self.__attributeListMiss[4] curV = self.__testRowUnicodeMiss[3] self.assertEqual(dcU.replaceValue(curV, "newVal", at), dcU.getRowCount()) at = self.__attributeListMiss[5] curV = self.__testRowUnicodeMiss[4] self.assertEqual(dcU.replaceValue(curV, "newVal", at), dcU.getRowCount()) at = self.__attributeListMiss[6] curV = self.__testRowUnicodeMiss[5] self.assertEqual(dcU.replaceValue(curV, "newVal", at), dcU.getRowCount()) for ii in range(3, 7): at = self.__attributeListMiss[ii] self.assertEqual(dcU.replaceSubstring("newVal", "nextVal", at), dcU.getRowCount()) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testGetValues(self): """Test case - value getters""" try: dcU = DataCategory("A", self.__attributeList, self.__rowListUnicode) aL = dcU.getAttributeList() logger.debug("Row length %r", dcU.getRowCount()) for ii, v in enumerate(self.__testRowUnicode): at = aL[ii + 1] for j in range(0, dcU.getRowCount()): logger.debug("ii %d j %d at %s val %r ", ii, j, at, v) self.assertEqual(dcU.getValue(at, j), v) self.assertEqual(dcU.getValueOrDefault(at, j, "mydefault"), v) # # negative indices are interpreted in the python manner self.assertEqual(dcU.getValueOrDefault("colOrd", -1, "default"), 9) self.assertRaises(IndexError, dcU.getValue, "colOrd", dcU.getRowCount() + 1) self.assertRaises(ValueError, dcU.getValue, "badAtt", 0) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_cond_select_values(self, category_data): dcM = DataCategory('A', category_data['attributeListMiss'], category_data['rowListUnicodeMiss']) atL = dcM.getAttributeList() for ii, at in enumerate(atL[1:]): assert len( dcM.selectIndices(category_data['testRowUnicodeMiss'][ii], at)) == dcM.getRowCount() # print("Window %r" % [tt for tt in window(atL)]) for atW in window(atL, size=1): assert len( dcM.selectValueListWhere( atW, category_data['testRowUnicodeMiss'][-1], category_data['attributeListMiss'] [-1])) == dcM.getRowCount() for atW in window(atL, size=2): assert len( dcM.selectValueListWhere( atW, category_data['testRowUnicodeMiss'][-1], category_data['attributeListMiss'] [-1])) == dcM.getRowCount() for atW in window(atL, size=3): assert len( dcM.selectValueListWhere( atW, category_data['testRowUnicodeMiss'][-1], category_data['attributeListMiss'] [-1])) == dcM.getRowCount() for atW in window(atL, size=4): assert len( dcM.selectValueListWhere( atW, category_data['testRowUnicodeMiss'][-1], category_data['attributeListMiss'] [-1])) == dcM.getRowCount()
def test_edit_row_accessors(self, category_data): dcA = DataCategory('A', category_data['attributeList'], category_data['rowListAsciiA']) with pytest.raises(IndexError): dcA.getRow(dcA.getRowCount() + 1) with pytest.raises(IndexError): dcA.getRowAttributeDict(dcA.getRowCount() + 1) with pytest.raises(IndexError): dcA.getRowItemDict(dcA.getRowCount() + 1)
def test_set_values(self, category_data): dcU = DataCategory('A', category_data['attributeListMiss'], category_data['rowListUnicodeMiss']) for i in range(0, dcU.getRowCount()): dcU.setValue('newValue', attributeName='colM1', rowIndex=i) assert dcU.setValue('newValue', attributeName='colM1', rowIndex=dcU.getRowCount() + 5) with pytest.raises(ValueError): dcU.setValue('newValue', 'colX', 0)
def test_edit_remove_row(self, category_data): dcA = DataCategory('A', category_data['attributeList'], category_data['rowListUnicode'], raiseExceptions=True) for jj in range(0, dcA.getRowCount()): ii = dcA.getRowCount() dcA.removeRow(0) assert ii - 1 == dcA.getRowCount() # assert 0 == dcA.getRowCount()
def testEditRowAccessors(self): """Test case - row accessors""" try: # dcA = DataCategory("A", self.__attributeList, self.__rowListAsciiA) self.assertRaises(IndexError, dcA.getRow, dcA.getRowCount() + 1) self.assertRaises(IndexError, dcA.getRowAttributeDict, dcA.getRowCount() + 1) self.assertRaises(IndexError, dcA.getRowItemDict, dcA.getRowCount() + 1) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_edit_extend(self, category_data): dcA = DataCategory('A', category_data['attributeList'], category_data['rowListAsciiA']) na = len(dcA.getAttributeList()) assert dcA.appendAttributeExtendRows('colNew') == na + 1 row = dcA.getRow(dcA.getRowCount() - 1) assert row[na] == "?"
def testCompareValues(self): """Test case - compare object values -""" try: dcU = DataCategory("A", self.__attributeList, self.__rowListUnicode) dcM = DataCategory("A", self.__attributeListMiss, self.__rowListUnicodeMiss) na = dcU.getAttributeList() self.assertGreaterEqual(len(na), 1) tupL = dcU.cmpAttributeValues(dcU) for tup in tupL: self.assertEqual(tup[1], True) tupL = dcU.cmpAttributeValues(dcM) for tup in tupL: if tup[0] in ["colC", "colD"]: self.assertEqual(tup[1], False) else: self.assertEqual(tup[1], True) # dcX = DataCategory("A", self.__attributeList, self.__rowListUnicode) self.assertTrue( dcX.setValue(u"134ĆćĈĉĊċČčĎďĐđĒēĠġĢģĤĥĦħĨxyz", attributeName="colD", rowIndex=dcX.getRowCount() - 2)) tupL = dcU.cmpAttributeValues(dcX) for tup in tupL: if tup[0] in ["colD"]: self.assertEqual(tup[1], False) else: self.assertEqual(tup[1], True) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_compare_values(self, category_data): dcU = DataCategory('A', category_data['attributeList'], category_data['rowListUnicode']) dcM = DataCategory('A', category_data['attributeListMiss'], category_data['rowListUnicodeMiss']) na = dcU.getAttributeList() assert len(na) >= 1 tupL = dcU.cmpAttributeValues(dcU) for tup in tupL: assert tup[1] == True tupL = dcU.cmpAttributeValues(dcM) for tup in tupL: if tup[0] in ['colC', 'colD']: assert tup[1] == False else: assert tup[1] == True # dcX = DataCategory('A', category_data['attributeList'], category_data['rowListUnicode']) assert dcX.setValue( u'134ĆćĈĉĊċČÄ�ÄŽÄ�Ä�Ä‘Ä’Ä“Ä Ä¡Ä¢Ä£Ä¤Ä¥Ä¦Ä§Ä¨xyz', attributeName='colD', rowIndex=dcX.getRowCount() - 2) tupL = dcU.cmpAttributeValues(dcX) for tup in tupL: if tup[0] in ['colD']: assert tup[1] == False else: assert tup[1] == True
def testEditAttributes(self): """Test case - get and extend atttribute names""" try: # dcA = DataCategory("A", self.__attributeList, self.__rowListAsciiA) self.assertEqual(0, dcA.getRowIndex()) self.assertEqual(None, dcA.getCurrentAttribute()) # na = len(dcA.getAttributeList()) tL = dcA.getAttributeListWithOrder() self.assertEqual(len(tL), na) na = len(dcA.getAttributeList()) self.assertEqual(dcA.appendAttribute("ColNew"), na + 1) row = dcA.getFullRow(0) self.assertEqual(row[na], "?") # row = dcA.getFullRow(dcA.getRowCount() + 1) for cV in row: self.assertEqual(cV, "?") # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testEditRemoveRow(self): """Test case - remove rows""" try: dcA = DataCategory("A", self.__attributeList, self.__rowListUnicode, raiseExceptions=True) for _ in range(0, dcA.getRowCount()): ii = dcA.getRowCount() dcA.removeRow(0) self.assertEqual(ii - 1, dcA.getRowCount()) # self.assertEqual(0, dcA.getRowCount()) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSetValues(self): """Test case - value setters""" try: dcU = DataCategory("A", self.__attributeListMiss, self.__rowListUnicodeMiss) for i in range(0, dcU.getRowCount()): dcU.setValue("newValue", attributeName="colM1", rowIndex=i) self.assertTrue( dcU.setValue("newValue", attributeName="colM1", rowIndex=dcU.getRowCount() + 5)) self.assertRaises(ValueError, dcU.setValue, "newValue", "colX", 0) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_get_values(self, category_data): dcU = DataCategory('A', category_data['attributeList'], category_data['rowListUnicode']) aL = dcU.getAttributeList() print("Row length %r " % dcU.getRowCount()) for ii, v in enumerate(category_data['testRowUnicode']): at = aL[ii + 1] for j in range(0, dcU.getRowCount()): print("ii %d j %d at %s val %r " % (ii, j, at, v)) assert dcU.getValue(at, j) == v assert dcU.getValueOrDefault(at, j, 'mydefault') == v # # negative indices are interpreted in the python manner assert dcU.getValueOrDefault('colOrd', -1, 'default') == 9 with pytest.raises(IndexError): dcU.getValueOrDefault('colOrd', dcU.getRowCount() + 1, 0) with pytest.raises(ValueError): dcU.getValueOrDefault('badAtt', 0, 0)
def testCondSelectValues(self): """Test case - value selections - """ try: dcM = DataCategory("A", self.__attributeListMiss, self.__rowListUnicodeMiss) # self.__testRowUnicodeMiss = [u'someData', 100222, None, '?', '.', u'abcdĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨxyz', 234.2345] # self.__attributeListMiss atL = dcM.getAttributeList() for ii, at in enumerate(atL[1:]): self.assertEqual( len(dcM.selectIndices(self.__testRowUnicodeMiss[ii], at)), dcM.getRowCount()) # logger.debug("Window %r", [tt for tt in window(atL)]) for atW in window(atL, size=1): self.assertEqual( len( dcM.selectValueListWhere( atW, self.__testRowUnicodeMiss[-1], self.__attributeListMiss[-1])), dcM.getRowCount()) for atW in window(atL, size=2): self.assertEqual( len( dcM.selectValueListWhere( atW, self.__testRowUnicodeMiss[-1], self.__attributeListMiss[-1])), dcM.getRowCount()) for atW in window(atL, size=3): self.assertEqual( len( dcM.selectValueListWhere( atW, self.__testRowUnicodeMiss[-1], self.__attributeListMiss[-1])), dcM.getRowCount()) for atW in window(atL, size=4): self.assertEqual( len( dcM.selectValueListWhere( atW, self.__testRowUnicodeMiss[-1], self.__attributeListMiss[-1])), dcM.getRowCount()) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_replace_values(self, category_data): dcU = DataCategory('A', category_data['attributeListMiss'], category_data['rowListUnicodeMiss']) at = category_data['attributeListMiss'][3] curV = category_data['testRowUnicodeMiss'][2] assert dcU.replaceValue(curV, 'newVal', at) == dcU.getRowCount() at = category_data['attributeListMiss'][4] curV = category_data['testRowUnicodeMiss'][3] assert dcU.replaceValue(curV, 'newVal', at) == dcU.getRowCount() at = category_data['attributeListMiss'][5] curV = category_data['testRowUnicodeMiss'][4] assert dcU.replaceValue(curV, 'newVal', at) == dcU.getRowCount() at = category_data['attributeListMiss'][6] curV = category_data['testRowUnicodeMiss'][5] assert dcU.replaceValue(curV, 'newVal', at) == dcU.getRowCount() for ii in range(3, 7): at = category_data['attributeListMiss'][ii] assert dcU.replaceSubstring('newVal', 'nextVal', at) == dcU.getRowCount()
def testEditExtend(self): """Test case - category extension methods""" try: dcA = DataCategory("A", self.__attributeList, self.__rowListAsciiA) na = len(dcA.getAttributeList()) self.assertEqual(dcA.appendAttributeExtendRows("colNew"), na + 1) row = dcA.getRow(dcA.getRowCount() - 1) self.assertEqual(row[na], "?") # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def test_edit_attributes(self, category_data): dcA = DataCategory('A', category_data['attributeList'], category_data['rowListAsciiA']) assert 0 == dcA.getRowIndex() assert None == dcA.getCurrentAttribute() # na = len(dcA.getAttributeList()) tL = dcA.getAttributeListWithOrder() assert len(tL) == na na = len(dcA.getAttributeList()) assert dcA.appendAttribute("ColNew") == na + 1 row = dcA.getFullRow(0) assert row[na] == "?" # row = dcA.getFullRow(dcA.getRowCount() + 1) for c in row: assert c == "?"
def __extractExtra(self, xrt): """ Separately extract and parse data from the input document related to EM graphs. Args: xrt: ElementTree root element Returns: (list): DataCategory objects """ rL = [] elV = xrt.find("EM_validation") if not elV: return rL # graphDataL = [] # logger.debug("Starting extraExtract -- ") gAtList = [ "graph_data_id", "graph_id", "title", "x_axis_title", "x_axis_scale", "x_axis_units", "y_axis_title", "y_axis_scale", "y_axis_units" ] infObj = DataCategory("pdbx_vrpt_em_2d_graph_info", gAtList) # for el in elV: logger.debug("-- EM element tag %r attrib count (%d): %r", el.tag, len(list(el.attrib.keys())), list(el.attrib.keys())) if el.tag == "RecommendedContourLevel" and "value" in el.attrib: cObj = DataCategory( "pdbx_vrpt_em_details", attributeNameList=["ordinal", "recommended_contour_level"]) cObj.setValue(1, "ordinal", 0) cObj.setValue(el.attrib["value"], "recommended_contour_level", 0) rL.append(cObj) elif el.tag == "map_value_distribution": try: dL = self.__getGraphDataElements(el, graphDataId="d_mvd") cgD = self.__getCommonGraphAttributes(el) if dL and cgD: iRow = infObj.getRowCount() for k, v in cgD.items(): infObj.setValue(v, k, iRow) infObj.setValue("map_value_distribution", "graph_id", iRow) infObj.setValue("d_mvd", "graph_data_id", iRow) cObj = DataCategory( "pdbx_vrpt_em_graph_map_value_distribution", attributeNameList=["graph_id"]) cObj.setValue("map_value_distribution", "graph_id", 0) graphDataL.extend(dL) rL.append(cObj) except Exception as e: logger.exception("Failing with %s", str(e)) elif el.tag == "volume_estimate": try: dL = self.__getGraphDataElements(el, graphDataId="d_ve") cgD = self.__getCommonGraphAttributes(el) if dL and cgD: iRow = infObj.getRowCount() for k, v in cgD.items(): infObj.setValue(v, k, iRow) infObj.setValue("volume_estimate", "graph_id", iRow) infObj.setValue("d_ve", "graph_data_id", iRow) cObj = DataCategory( "pdbx_vrpt_em_graph_volume_estimate", attributeNameList=["graph_id"]) cObj.setValue("volume_estimate", "graph_id", 0) graphDataL.extend(dL) rL.append(cObj) except Exception as e: logger.exception("Failing with %s", str(e)) elif el.tag == "rotationally_averaged_power_spectrum": try: dL = self.__getGraphDataElements(el, graphDataId="d_raps") cgD = self.__getCommonGraphAttributes(el) if dL and cgD: iRow = infObj.getRowCount() for k, v in cgD.items(): infObj.setValue(v, k, iRow) infObj.setValue("rotationally_averaged_power_spectrum", "graph_id", iRow) infObj.setValue("d_raps", "graph_data_id", iRow) cObj = DataCategory( "pdbx_vrpt_em_graph_rotationally_averaged_power_spectrum", attributeNameList=["graph_id"]) cObj.setValue("rotationally_averaged_power_spectrum", "graph_id", 0) graphDataL.extend(dL) rL.append(cObj) except Exception as e: logger.exception("Failing with %s", str(e)) elif el.tag == "atom_inclusion": # backbone or all_atoms try: cObj = None for cN in ["all_atoms", "backbone"]: ch = el.find(cN) abbrev = "aa" if cN == "all_atoms" else "bb" if ch: gId = "atom_inclusion_%s" % cN gdId = "d_ai_%s" % abbrev dL = self.__getGraphDataElements(ch, graphDataId=gdId) cgD = self.__getCommonGraphAttributes(ch) if dL and cgD: iRow = infObj.getRowCount() for k, v in cgD.items(): infObj.setValue(v, k, iRow) infObj.setValue(gId, "graph_id", iRow) infObj.setValue(gdId, "graph_data_id", iRow) # if not cObj: cObj = DataCategory( "pdbx_vrpt_em_graph_atom_inclusion", attributeNameList=["graph_id", "type"]) tRow = cObj.getRowCount() cObj.setValue(gId, "graph_id", tRow) cObj.setValue(cN, "type", tRow) graphDataL.extend(dL) if cObj.getRowCount(): rL.append(cObj) except Exception as e: logger.exception("Failing with %s", str(e)) elif el.tag == "fsc": for ch in el: logger.debug( "-- Child fsc element tag %r attrib count (%d): %r", ch.tag, len(list(ch.attrib.keys())), list(ch.attrib.keys())) if ch.tag == "resolution_intersections": try: atList = [ "ordinal", "resolution_units", "spatial_frequency_units", "correlation", "resolution", "spatial_frequency", "curve", "type" ] rObj = DataCategory( "pdbx_vrpt_em_resolution_intersections", atList) ru = ch.attrib[ "resolution_unit"] if "resolution_unit" in ch.attrib else "?" sfu = ch.attrib[ "spatial_frequency_unit"] if "spatial_frequency_unit" in ch.attrib else "?" ii = 0 for gch in ch: if gch.tag == "intersection": for at in [ "correlation", "resolution", "spatial_frequency", "curve", "type" ]: atV = gch.attrib[ at] if at in gch.attrib else "?" rObj.setValue(atV, at, ii) rObj.setValue(ru, "resolution_units", ii) rObj.setValue(sfu, "spatial_frequency_units", ii) rObj.setValue(ii + 1, "ordinal", ii) ii += 1 if rObj.getRowCount(): rL.append(rObj) except Exception as e: logger.exception("Failing with %s", str(e)) elif ch.tag == "fsc_curves": try: iCount = 0 cObj = None for gch in ch: if gch.tag == "fsc_curve": iCount += 1 gdId = "fsc_%d" % iCount dL = self.__getGraphDataElements( gch, graphDataId=gdId) cgD = self.__getCommonGraphAttributes(gch) curveName = gch.attrib[ "curve_name"] if "curve_name" in gch.attrib else "?" gId = curveName fscType = gch.attrib[ "type"] if "type" in gch.attrib else "?" if dL and cgD and curveName != "?": iRow = infObj.getRowCount() for k, v in cgD.items(): infObj.setValue(v, k, iRow) # infObj.setValue(gId, "graph_id", iRow) infObj.setValue( gdId, "graph_data_id", iRow) # iRow = iCount - 1 if not cObj: cObj = DataCategory( "pdbx_vrpt_em_graph_fsc_curve", attributeNameList=[ "graph_id", "type", "curve_name" ]) cObj.setValue(gId, "graph_id", iRow) cObj.setValue(fscType, "type", iRow) cObj.setValue(curveName, "curve_name", iRow) graphDataL.extend(dL) if cObj: rL.append(cObj) except Exception as e: logger.exception("Failing with %s", str(e)) elif ch.tag == "fsc_indicator_curves": try: iCount = 0 cObj = None for gch in ch: if gch.tag == "fsc_indicator_curve": iCount += 1 gdId = "fsc_i_%d" % iCount dL = self.__getGraphDataElements( gch, graphDataId=gdId) cgD = self.__getCommonGraphAttributes(gch) curveName = gch.attrib[ "curve_name"] if "curve_name" in gch.attrib else "?" gId = curveName fscType = gch.attrib[ "type"] if "type" in gch.attrib else "?" dataCurveName = gch.attrib[ "data_curve"] if "data_curve" in gch.attrib else "?" if dL and cgD and curveName != "?" and dataCurveName != "?": iRow = infObj.getRowCount() for k, v in cgD.items(): infObj.setValue(v, k, iRow) # infObj.setValue(gId, "graph_id", iRow) infObj.setValue( gdId, "graph_data_id", iRow) # iRow = iCount - 1 if not cObj: cObj = DataCategory( "pdbx_vrpt_em_graph_fsc_indicator_curve", attributeNameList=[ "graph_id", "type", "curve_name", "data_curve_name" ]) cObj.setValue(gId, "graph_id", iRow) cObj.setValue(fscType, "type", iRow) cObj.setValue(curveName, "curve_name", iRow) cObj.setValue(dataCurveName, "data_curve_name", iRow) graphDataL.extend(dL) if cObj: rL.append(cObj) except Exception as e: logger.exception("Failing with %s", str(e)) # # -- end of element processing # ------- if infObj.getRowCount(): rL.append(infObj) # dObj = DataCategory( "pdbx_vrpt_em_2d_graph_data", ["ordinal", "graph_data_id", "x_value", "y_value"]) for ii, dD in enumerate(graphDataL): for k, v in dD.items(): dObj.setValue(v, k, ii) dObj.setValue(ii + 1, "ordinal", ii) rL.append(dObj) return rL