def test_qistoph_pr_27(self): """ Tests support for Bool, Integer, Long classes (PR #27) """ # Load the basic map jobj = self.read_file("testBoolIntLong.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) # Basic checking self.assertEqual(pobj[u"key1"], u"value1") self.assertEqual(pobj[u"key2"], u"value2") self.assertEqual(pobj[u"int"], 9) self.assertEqual(pobj[u"int2"], 10) self.assertEqual(pobj[u"bool"], True) self.assertEqual(pobj[u"bool2"], True) # Load the parent map jobj2 = self.read_file("testBoolIntLong-2.ser") pobj2 = javaobj.loads(jobj2) _logger.debug(pobj2) parent_map = pobj2[u"subMap"] for key, value in pobj.items(): self.assertEqual(parent_map[key], value)
def test_jceks_issue_5(self): """ Tests the handling of JCEKS issue #5 """ jobj = self.read_file("jceks_issue_5.ser") pobj = javaobj.loads(jobj) _logger.info(pobj)
def _convert_java_value(kv, include_non_primitives=True): try: pobj = javaobj.loads(kv.serialized_value) if isinstance(pobj, (str, int, float, bool)): return pobj if hasattr(pobj, 'value'): return pobj.value if include_non_primitives: # java objects that are not strings return value and encoded value # Hexadecimal byte array for Serialized objects that return { 'value': json.dumps(pobj, default=lambda custom_field: custom_field.__dict__, sort_keys=True, indent=2), 'raw': kv.serialized_value.hex() } return None except Exception: Log.exception("Failed to parse data as java object") if include_non_primitives: return _raw_value(kv) return None
def test_arrays(self): """ Tests handling of Java arrays """ jobj = self.read_file("objArrays.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) classdesc = pobj.get_class() _logger.debug(classdesc) _logger.debug(classdesc.fields_names) _logger.debug(classdesc.fields_types) # public String[] stringArr = {"1", "2", "3"}; # public int[] integerArr = {1,2,3}; # public boolean[] boolArr = {true, false, true}; # public TestConcrete[] concreteArr = {new TestConcrete(), # new TestConcrete()}; _logger.debug(pobj.stringArr) _logger.debug(pobj.integerArr) _logger.debug(pobj.boolArr) _logger.debug(pobj.concreteArr) self._try_marshalling(jobj, pobj)
def test_class(self): """ Reads the serialized String class """ jobj = self.read_file("testClass.ser") pobj = javaobj.loads(jobj) _logger.debug("Read object: %s", pobj) self.assertEqual(pobj.name, "java.lang.String") self._try_marshalling(jobj, pobj)
def test_char_rw(self): """ Reads testChar.ser and checks the serialization process """ jobj = self.read_file("testChar.ser") pobj = javaobj.loads(jobj) _logger.debug("Read char object: %s", pobj) self.assertEqual(pobj, "\x00C") self._try_marshalling(jobj, pobj)
def _try_marshalling(self, original_stream, original_object): """ Tries to marshall an object and compares it to the original stream """ _logger.debug("Try Marshalling") marshalled_stream = javaobj.dumps(original_object) # Reloading the new dump allows to compare the decoding sequence try: javaobj.loads(marshalled_stream) self.assertEqual(original_stream, marshalled_stream) except Exception: print("-" * 80) print("=" * 30, "Original", "=" * 30) print(hexdump(original_stream)) print("*" * 30, "Marshalled", "*" * 30) print(hexdump(marshalled_stream)) print("-" * 80) raise
def test_boolean(self): """ Reads testBoolean.ser and checks the serialization process """ jobj = self.read_file("testBoolean.ser") pobj = javaobj.loads(jobj) _logger.debug("Read boolean object: %s", pobj) self.assertEqual(pobj, chr(0)) self._try_marshalling(jobj, pobj)
def test_bytes_rw(self): """ Reads testBytes.ser and checks the serialization process """ jobj = self.read_file("testBytes.ser") pobj = javaobj.loads(jobj) _logger.debug("Read bytes: %s", pobj) self.assertEqual(pobj, "HelloWorld") self._try_marshalling(jobj, pobj)
def test_double_rw(self): """ Reads testDouble.ser and checks the serialization process """ jobj = self.read_file("testDouble.ser") pobj = javaobj.loads(jobj) _logger.debug("Read double object: %s", pobj) self.assertEqual(pobj, "\x7f\xef\xff\xff\xff\xff\xff\xff") self._try_marshalling(jobj, pobj)
def test_chars_gzip(self): """ Reads testChars.ser.gz """ # Expected string as a UTF-16 string expected = "python-javaobj".encode("utf-16-be").decode("latin1") jobj = self.read_file("testChars.ser.gz") pobj = javaobj.loads(jobj) _logger.debug("Read char objects: %s", pobj) self.assertEqual(pobj, expected)
def test_chars_rw(self): """ Reads testChars.ser and checks the serialization process """ # Expected string as a UTF-16 string expected = "python-javaobj".encode("utf-16-be").decode("latin1") jobj = self.read_file("testChars.ser") pobj = javaobj.loads(jobj) _logger.debug("Read char objects: %s", pobj) self.assertEqual(pobj, expected) self._try_marshalling(jobj, pobj)
def test_byte(self): """ Reads testByte.ser The result from javaobj is a single-character string. """ jobj = self.read_file("testByte.ser") pobj = javaobj.loads(jobj) _logger.debug("Read Byte: %r", pobj) self.assertEqual(pobj, chr(127)) self._try_marshalling(jobj, pobj)
def test_japan(self): """ Tests the UTF encoding handling with Japanese characters """ # Japan.ser contains a string using wide characters: the name of the # state from Japan (according to wikipedia) jobj = self.read_file("testJapan.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) # Compare the UTF-8 encoded version of the name self.assertEqual( pobj, b"\xe6\x97\xa5\xe6\x9c\xac\xe5\x9b\xbd".decode("utf-8")) self._try_marshalling(jobj, pobj)
def test_collections(self): """ Tests the handling of ArrayList, LinkedList and HashMap """ jobj = self.read_file("objCollections.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) _logger.debug("arrayList: %s", pobj.arrayList) self.assertTrue(isinstance(pobj.arrayList, list)) _logger.debug("hashMap: %s", pobj.hashMap) self.assertTrue(isinstance(pobj.hashMap, dict)) _logger.debug("linkedList: %s", pobj.linkedList) self.assertTrue(isinstance(pobj.linkedList, list))
def test_2d_array(self): """ Tests the handling of a 2D array """ jobj = self.read_file("test2DArray.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) self.assertEqual( pobj, [ [1, 2, 3], [4, 5, 6], ], )
def test_class_with_byte_array_rw(self): """ Tests handling of classes containing a Byte Array """ jobj = self.read_file("testClassWithByteArray.ser") pobj = javaobj.loads(jobj) # j8spencer (Google, LLC) 2018-01-16: It seems specific support for # byte arrays was added, but is a little out-of-step with the other # types in terms of style. This UT was broken, since the "myArray" # member has the array stored as a tuple of ints (not a byte string) # in memeber called '_data.' I've updated to pass the UTs. self.assertEqual(pobj.myArray._data, (1, 3, 7, 11)) self._try_marshalling(jobj, pobj)
def test_sets(self): """ Tests handling of HashSet and TreeSet """ for filename in ( "testHashSet.ser", "testTreeSet.ser", "testLinkedHashSet.ser", ): _logger.debug("Loading file: %s", filename) jobj = self.read_file(filename) pobj = javaobj.loads(jobj) _logger.debug(pobj) self.assertIsInstance(pobj, set) self.assertSetEqual({i.value for i in pobj}, {1, 2, 42})
def test_times(self): """ Tests the handling of java.time classes """ jobj = self.read_file("testTime.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) # First one is a duration of 10s duration = pobj[0] self.assertEqual(duration.second, 10) # Check types self.assertIsInstance(pobj, javaobj.beans.JavaArray) for obj in pobj: self.assertIsInstance(obj, javaobj.DefaultObjectTransformer.JavaTime)
def test_super(self): """ Tests basic class inheritance handling """ jobj = self.read_file("objSuper.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) classdesc = pobj.get_class() _logger.debug(classdesc) _logger.debug(classdesc.fields_names) _logger.debug(classdesc.fields_types) self.assertEqual(pobj.childString, u"Child!!") self.assertEqual(pobj.bool, True) self.assertEqual(pobj.integer, -1) self.assertEqual(pobj.superString, u"Super!!") self._try_marshalling(jobj, pobj)
def test_enums(self): """ Tests the handling of "enum" types """ jobj = self.read_file("objEnums.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) classdesc = pobj.get_class() _logger.debug(classdesc) _logger.debug(classdesc.fields_names) _logger.debug(classdesc.fields_types) self.assertEqual(classdesc.name, "ClassWithEnum") self.assertEqual(pobj.color.classdesc.name, "Color") self.assertEqual(pobj.color.constant, u"GREEN") for color, intended in zip(pobj.colors, (u"GREEN", u"BLUE", u"RED")): self.assertEqual(color.classdesc.name, "Color") self.assertEqual(color.constant, intended)
def test_char_array(self): """ Tests the loading of a wide-char array """ jobj = self.read_file("testCharArray.ser") pobj = javaobj.loads(jobj) _logger.debug(pobj) self.assertEqual( pobj, [ u"\u0000", u"\ud800", u"\u0001", u"\udc00", u"\u0002", u"\uffff", u"\u0003", ], ) self._try_marshalling(jobj, pobj)
def test_fields(self): """ Reads a serialized object and checks its fields """ jobj = self.read_file("test_readFields.ser") pobj = javaobj.loads(jobj) _logger.debug("Read object: %s", pobj) self.assertEqual(pobj.aField1, u"Gabba") self.assertEqual(pobj.aField2, None) classdesc = pobj.get_class() self.assertTrue(classdesc) self.assertEqual(classdesc.serialVersionUID, 0x7F0941F5) self.assertEqual(classdesc.name, "OneTest$SerializableTestHelper") _logger.debug("Class..........: %s", classdesc) _logger.debug(".. Flags.......: %s", classdesc.flags) _logger.debug(".. Fields Names: %s", classdesc.fields_names) _logger.debug(".. Fields Types: %s", classdesc.fields_types) self.assertEqual(len(classdesc.fields_names), 3) self._try_marshalling(jobj, pobj)
def read_harvester_settings_file(self, path: str) -> Dict[str, Dict[str, str]]: """Returns a dictionary representing the harvester settings. First, tries reading the settings as if the source file is UTF-8 encoded JSON of the following form (used for testing): { "harvester_settings_key_1": { "repository_name": "repository_name_1", "base_url": "http://example.edu/oai2", "set_spec": "set_spec_1", "split_by_set": False }, ... } If that fails, tries reading the settings as if the source file is a serialized java.util.Hashtable instance from jOAI (used for production). """ try: # See if it's in JSON already. with open(path, 'r') as harvester_settings_file: # Make sure we transform the key before storing. return { self.get_harvester_settings_key(key): metadata for key, metadata in json.load( harvester_settings_file).items() } except JSONDecodeError as e: # Invalid JSON. raise IndexerError( 'Cannot load scheduled harvests settings: {}'.format(e)) except FileNotFoundError as e: # This file won't exist when no harvests have been scheduled, so it's probably fine. logging.debug( 'Scheduled harvests settings file does not exist: {}'.format( path)) return {} except UnicodeDecodeError as e: logging.debug('Config file is not JSON: {}'.format(e)) # Open the file in binary mode and try to parse it with javaobj. with open(path, 'rb') as harvester_settings_file: pobj = javaobj.loads(harvester_settings_file.read()) is_scheduled_harvest = lambda h: JOAI_SCHEDULED_HARVEST_CLASSNAME in str( h) return { self.get_harvester_settings_key(pobj_harvest.harvestDir.path): { 'repository_name': pobj_harvest.repositoryName, 'base_url': pobj_harvest.baseURL, 'set_spec': pobj_harvest.setSpec, 'split_by_set': pobj_harvest.splitBySet } for pobj_harvest in list( filter(is_scheduled_harvest, pobj.annotations)) } except Exception as e: # Something else went wrong. raise IndexerError( 'Cannot load scheduled harvests settings: {}'.format(e))
def extract_logical_plan(self, topology): """ Returns the representation of logical plan that will be returned from Tracker. """ logicalPlan = { "spouts": {}, "bolts": {}, } # Add spouts. for spout in topology.spouts(): spoutType = "default" spoutSource = "NA" spoutVersion = "NA" spoutConfigs = spout.comp.config.kvs spoutExtraLinks = [] for kvs in spoutConfigs: if kvs.key == "spout.type": spoutType = javaobj.loads(kvs.serialized_value) elif kvs.key == "spout.source": spoutSource = javaobj.loads(kvs.serialized_value) elif kvs.key == "spout.version": spoutVersion = javaobj.loads(kvs.serialized_value) elif kvs.key == "extra.links": spoutExtraLinks = json.loads(javaobj.loads(kvs.serialized_value)) spoutPlan = { "config": convert_pb_kvs(spoutConfigs, include_non_primitives=False), "type": spoutType, "source": spoutSource, "version": spoutVersion, "outputs": [ {"stream_name": outputStream.stream.id} for outputStream in spout.outputs ], "extra_links": spoutExtraLinks, } logicalPlan["spouts"][spout.comp.name] = spoutPlan # render component extra links with general params execution_state = { "cluster": topology.execution_state.cluster, "environ": topology.execution_state.environ, "role": topology.execution_state.role, "jobname": topology.name, "submission_user": topology.execution_state.submission_user, } for link in spoutPlan["extra_links"]: link[EXTRA_LINK_URL_KEY] = self.config.get_formatted_url(link[EXTRA_LINK_FORMATTER_KEY], execution_state) # Add bolts. for bolt in topology.bolts(): boltName = bolt.comp.name logicalPlan["bolts"][boltName] = { "config": convert_pb_kvs(bolt.comp.config.kvs, include_non_primitives=False), "outputs": [ {"stream_name": outputStream.stream.id} for outputStream in bolt.outputs ], "inputs": [ { "stream_name": inputStream.stream.id, "component_name": inputStream.stream.component_name, "grouping": topology_pb2.Grouping.Name(inputStream.gtype), } for inputStream in bolt.inputs ] } return logicalPlan