def setUp(self):
     schema_file = os.path.dirname(os.path.realpath(__file__)) + \
             "/TCCDMDatum.avsc"
     self.reader_schema = Utils.load_schema(schema_file)
     self.writer_schema = Utils.load_schema(schema_file)
     self.serializer = AvroGenericSerializer(self.writer_schema)
     self.deserializer = AvroGenericDeserializer(self.reader_schema)
    def test_ambiguous_cdm18_parsing(self):
        deserializer = AvroGenericDeserializer(self.reader_schema,
                                               self.reader_schema)
        fog = record_generator.CDMFileObjectGenerator(self.serializer)
        ssog = record_generator.CDMSrcSinkObjectGenerator(self.serializer)
        rkog = record_generator.CDMRegistryKeyObjectGenerator(self.serializer)
        parser = parsing.CDMParser(self.reader_schema)

        file_object_record = fog.generate_random_record(5)
        # Remove all of the unique optional attributes
        file_object_record["datum"].pop("localPrincipal", None)
        file_object_record["datum"].pop("size", None)
        file_object_record["datum"].pop("peInfo", None)
        file_object_record["datum"].pop("hashes", None)

        src_sink_object_record = ssog.generate_random_record(5)
        registry_key_object_record = rkog.generate_random_record(5)

        sfo = self.serializer.serialize_to_bytes(file_object_record)
        ssso = self.serializer.serialize_to_bytes(src_sink_object_record)
        srko = self.serializer.serialize_to_bytes(registry_key_object_record)

        file_object_record2 = deserializer.deserialize_bytes(sfo)
        src_sink_object_record2 = deserializer.deserialize_bytes(ssso)
        registry_key_object_record2 = deserializer.deserialize_bytes(srko)

        self.assertTrue(
            parser.get_union_branch_type(file_object_record2) == "FileObject")
        self.assertTrue(
            parser.get_union_branch_type(src_sink_object_record2) ==
            "SrcSinkObject")
        self.assertTrue(
            parser.get_union_branch_type(registry_key_object_record2) ==
            "RegistryKeyObject")
Пример #3
0
    def test_serialization_nested(self):
        schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledEdge.avsc"
        schema = Utils.load_schema(schema_file)

        self.serializer = AvroGenericSerializer(schema)
        self.deserializer = AvroGenericDeserializer(schema, schema)
        self.serialization_test_helper(schema, False)
Пример #4
0
    def serialization_test_helper(self, schema, is_union):
        node_file_path = os.path.dirname(os.path.realpath(__file__)) + \
                "/testNodes.avro"
        edge_file_path = os.path.dirname(os.path.realpath(__file__)) + \
                "/testEdges.avro"

        # Create some nodes and an edge.
        node1 = Utils.create_node(1, "unitOfExecution", schema, True)
        node2 = Utils.create_node(2, "artifact", schema, True)
        edge1 = Utils.create_edge(node1, node2, "read", schema)

        if is_union:
            # Serialize the nodes and edge to files.
            with open(node_file_path, "wb") as node_file:
                self.serializer = AvroGenericSerializer(
                    self.writer_schema, node_file)
                self.serializer.serialize_to_file([node1, node2])
                self.serializer.close_file_serializer()

        with open(edge_file_path, "wb") as edge_file:
            self.serializer = AvroGenericSerializer(self.writer_schema,
                                                    edge_file)
            self.serializer.serialize_to_file([edge1])
            self.serializer.close_file_serializer()

        if is_union:
            # Deserialize from the files to records.
            with open(node_file_path, "rb") as node_file:
                self.deserializer = AvroGenericDeserializer(
                    self.reader_schema, self.writer_schema, node_file)
                deserialized_nodes = \
                        self.deserializer.deserialize_from_file()
                self.deserializer.close_file_deserializer()

        with open(edge_file_path, "rb") as edge_file:
            self.deserializer = AvroGenericDeserializer(
                self.reader_schema, self.writer_schema, edge_file)
            deserialized_edges = \
                    self.deserializer.deserialize_from_file()
            self.deserializer.close_file_deserializer()

        if is_union:
            # Check the deserialized nodes.
            self.assertTrue(len(deserialized_nodes) == 2)
            self.compare_nodes(node1, deserialized_nodes[0])
            self.compare_nodes(node2, deserialized_nodes[1])

        # Check the deserialized edges.
        self.assertTrue(len(deserialized_edges) == 1)
        self.compare_edges(edge1, deserialized_edges[0])

        if is_union:
            # Clean up the files
            os.remove(node_file_path)

        os.remove(edge_file_path)
Пример #5
0
    def setUp(self):
        schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledGraph.avsc"
        schema = Utils.load_schema(schema_file)

        self.reader_schema = schema
        self.writer_schema = schema
        self.node_schema = Utils.get_schema_by_name(
            self.writer_schema, TestUnionSchema._NODE_SCHEMA_FULLNAME)
        self.edge_schema = Utils.get_schema_by_name(
            self.writer_schema, TestUnionSchema._EDGE_SCHEMA_FULLNAME)

        self.serializer = AvroGenericSerializer(self.writer_schema)
        self.deserializer = AvroGenericDeserializer(self.reader_schema,
                                                    self.writer_schema)
Пример #6
0
 def test_serialization_union(self):
     schema_file = os.path.dirname(os.path.realpath(__file__)) + \
             "/LabeledGraphv2.avsc"
     self.reader_schema = Utils.load_schema(schema_file)
     self.deserializer = AvroGenericDeserializer(self.reader_schema,
                                                 self.writer_schema)
     self.serialization_test_helper(self.writer_schema, True)
Пример #7
0
class TestUnionSchema(unittest.TestCase):
    """
    Test that serializing and deserializing when using a union schema
    works as expected.
    """

    _NODE_SCHEMA_FULLNAME = "com.bbn.tc.schema.avro.LabeledNode"
    _EDGE_SCHEMA_FULLNAME = "com.bbn.tc.schema.avro.LabeledEdge"

    def setUp(self):
        schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledGraph.avsc"
        schema = Utils.load_schema(schema_file)

        self.reader_schema = schema
        self.writer_schema = schema
        self.node_schema = Utils.get_schema_by_name(
            self.writer_schema, TestUnionSchema._NODE_SCHEMA_FULLNAME)
        self.edge_schema = Utils.get_schema_by_name(
            self.writer_schema, TestUnionSchema._EDGE_SCHEMA_FULLNAME)

        self.serializer = AvroGenericSerializer(self.writer_schema)
        self.deserializer = AvroGenericDeserializer(self.reader_schema,
                                                    self.writer_schema)

    def test_union_schema(self):
        schema = self.writer_schema
        node1 = Utils.create_node(1, "unitOfExecution", schema, True)
        node2 = Utils.create_node(2, "agent", schema, True)
        edge1 = Utils.create_edge(node1, node2, "wasAssociatedWith", schema,
                                  True)

        serialized_node = self.serializer.serialize_to_bytes(node1)
        serialized_edge = self.serializer.serialize_to_bytes(edge1)

        deserialized_node = self.deserializer.deserialize_bytes(
            serialized_node)
        deserialized_edge = self.deserializer.deserialize_bytes(
            serialized_edge)

        # Don't convert these to strings, like in the Java code.  That results
        # in differences due to unicode strings for keys in the
        # Avro-deserialized in the Python 2.7 version, and we don't really
        # want to deal with that.
        self.assertTrue(node1 == deserialized_node)
        self.assertTrue(edge1 == deserialized_edge)
Пример #8
0
 def setUp(self):
     union_schema_file = os.path.dirname(os.path.realpath(__file__)) + \
             "/LabeledGraph.avsc"
     self.reader_schema = Utils.load_schema(union_schema_file)
     self.writer_schema = Utils.load_schema(union_schema_file)
     self.serializer = AvroGenericSerializer(self.writer_schema)
     self.deserializer = AvroGenericDeserializer(self.reader_schema,
                                                 self.writer_schema)
Пример #9
0
class TestJsonSerialization(TestBase):
    """
    Test of serializing and deserializing records to and from json.
    """
    def setUp(self):
        schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledEdge.avsc"
        self.schema = Utils.load_schema(schema_file)
        self.serializer = AvroGenericSerializer(self.schema)
        self.deserializer = AvroGenericDeserializer(self.schema, self.schema)

    def test_serialization(self):
        node1 = Utils.create_node(1, "unitOfExecution", True, self.schema)
        node2 = Utils.create_node(2, "artifact", True, self.schema)
        edge = Utils.create_edge(node1, node2, "read", True, self.schema)

        # Make sure serialization and deserialization is symmetric.
        json_edge = self.serializer.serialize_to_json(edge, True)
        deserialized_edge = self.deserializer.deserialize_json(json_edge)
        self.assertTrue(edge == deserialized_edge)

        # Make sure that the serializer can serialize to both bytes and json
        # without corrupting any internal state.  Also, test without making
        # the json serialization prettified.
        edge = Utils.create_edge(node1, node2, "modified", True, self.schema)
        self.serializer.serialize_to_bytes(edge)
        json_edge = self.serializer.serialize_to_json(edge)
        deserialized_edge = self.deserializer.deserialize_json(json_edge)
        self.assertTrue(edge == deserialized_edge)

    def test_bad_record_serialization(self):
        bad_edge = {"test": "bad"}
        with self.assertRaises(avro.io.AvroTypeException):
            self.serializer.serialize_to_json(bad_edge)

    def test_bad_deserialized_record(self):
        bad_serialized_edge = '{"test": "bad"}'
        with self.assertRaises(avro.io.AvroTypeException):
            self.deserializer.deserialize_json(bad_serialized_edge)
Пример #10
0
class TestByteSerialization(TestBase):
    """
    Test of serializing to and deserializing from a byte stream.
    """
    def test_serialization_union(self):
        self.serialization_test_helper(self.writer_schema, True)

    def test_serialization_nested(self):
        schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledEdge.avsc"
        schema = Utils.load_schema(schema_file)

        self.serializer = AvroGenericSerializer(schema)
        self.deserializer = AvroGenericDeserializer(schema, schema)
        self.serialization_test_helper(schema, False)

    def serialization_test_helper(self, schema, is_union):
        node1 = Utils.create_node(1, "unitOfExecution", schema, True)
        node2 = Utils.create_node(2, "artifact", schema, True)
        edge1 = Utils.create_edge(node1, node2, "read", schema)

        if is_union:
            serialized_node1 = self.serializer.serialize_to_bytes(node1)
            deserialized_node1 = \
                    self.deserializer.deserialize_bytes(serialized_node1)
            self.compare_nodes(node1, deserialized_node1)

            serialized_node2 = self.serializer.serialize_to_bytes(node2)
            deserialized_node2 = \
                    self.deserializer.deserialize_bytes(serialized_node2)
            self.compare_nodes(node2, deserialized_node2)

        serialized_edge1 = self.serializer.serialize_to_bytes(edge1)
        deserialized_edge1 = \
                self.deserializer.deserialize_bytes(serialized_edge1)
        self.compare_edges(edge1, deserialized_edge1)
Пример #11
0
 def setUp(self):
     schema_file = os.path.dirname(os.path.realpath(__file__)) + \
             "/LabeledEdge.avsc"
     self.schema = Utils.load_schema(schema_file)
     self.serializer = AvroGenericSerializer(self.schema)
     self.deserializer = AvroGenericDeserializer(self.schema, self.schema)
class TestCDMTypeParsing(unittest.TestCase):

    _KV_PAIRS = 5

    def setUp(self):
        schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/TCCDMDatum.avsc"
        self.reader_schema = Utils.load_schema(schema_file)
        self.writer_schema = Utils.load_schema(schema_file)
        self.serializer = AvroGenericSerializer(self.writer_schema)
        self.deserializer = AvroGenericDeserializer(self.reader_schema)

    def _run_record_type_test(self, generator, expected_value):
        parser = parsing.CDMParser(self.reader_schema)
        for i in range(20):
            record = generator.generate_random_record(
                TestCDMTypeParsing._KV_PAIRS)
            self.assertTrue(Utils.validate(self.writer_schema, record))
            self.assertTrue(parser.get_record_type(record) == expected_value)
            serialized = self.serializer.serialize_to_bytes(record)
            deserialized = self.deserializer.deserialize_bytes(serialized)
            self.assertTrue(
                parser.get_record_type(deserialized) == expected_value)

    def test_provenance_tag_node(self):
        generator = \
                record_generator.CDMProvenanceTagNodeGenerator(self.serializer)
        self._run_record_type_test(generator, "ProvenanceTagNode")

    def test_event(self):
        generator = record_generator.CDMEventGenerator(self.serializer)
        self._run_record_type_test(generator, "Event")

    def test_net_flow_object(self):
        generator = record_generator.CDMNetFlowObjectGenerator(self.serializer)
        self._run_record_type_test(generator, "NetFlowObject")

    def test_file_object(self):
        generator = record_generator.CDMFileObjectGenerator(self.serializer)
        self._run_record_type_test(generator, "FileObject")

    def test_src_sink_object(self):
        generator = record_generator.CDMSrcSinkObjectGenerator(self.serializer)
        self._run_record_type_test(generator, "SrcSinkObject")

    def test_ipc_object(self):
        generator = record_generator.CDMIpcObjectGenerator(self.serializer)
        self._run_record_type_test(generator, "IpcObject")

    def test_memory_object(self):
        generator = record_generator.CDMMemoryObjectGenerator(self.serializer)
        self._run_record_type_test(generator, "MemoryObject")

    def test_principal(self):
        generator = record_generator.CDMPrincipalGenerator(self.serializer)
        self._run_record_type_test(generator, "Principal")

    def test_time_marker(self):
        generator = record_generator.CDMTimeMarkerGenerator(self.serializer)
        self._run_record_type_test(generator, "TimeMarker")

    def test_unit_dependency_marker(self):
        generator = record_generator.CDMUnitDependencyGenerator(
            self.serializer)
        self._run_record_type_test(generator, "UnitDependency")

    def test_registry_key_object(self):
        generator = record_generator.CDMRegistryKeyObjectGenerator(
            self.serializer)
        self._run_record_type_test(generator, "RegistryKeyObject")

    def test_host(self):
        generator = record_generator.CDMHostGenerator(self.serializer)
        self._run_record_type_test(generator, "Host")

    def test_packet_socket_object(self):
        generator = record_generator.CDMPacketSocketObjectGenerator(
            self.serializer)
        self._run_record_type_test(generator, "PacketSocketObject")

    def test_end_marker(self):
        generator = record_generator.CDMEndMarkerGenerator(self.serializer)
        self._run_record_type_test(generator, "EndMarker")

    def test_unknown_provenance_node(self):
        generator = record_generator.CDMUnknownProvenanceNodeGenerator(
            self.serializer)
        self._run_record_type_test(generator, "UnknownProvenanceNode")