Пример #1
0
def read_json_input(input_data_path):
    """Reads json data with this format:

  [{"URL": "http://example.com/path.html", "Contents": "Text content of webpage here"},
   {"URL": "http://example.com/path.html", "Contents": "Text content of webpage here"}]"""
    if input_data_path == '-':
        data = sys.stdin.read()
    else:
        with open(input_data_path, 'r') as input_json_file:
            data = input_json_file.read()
    corpus = json.loads(data)
    result = page_view_sequence_pb2.DataSetProto()
    for item in corpus:
        page_content_proto = page_view_sequence_pb2.PageContentProto()
        codepoints = set()
        for code_point in item["Contents"]:
            codepoints.add(ord(code_point))
        for code_point in codepoints:
            page_content_proto.codepoints.append(code_point)
        page_view_proto = page_view_sequence_pb2.PageViewProto()
        page_view_proto.contents.append(page_content_proto)
        page_view_sequence = page_view_sequence_pb2.PageViewSequenceProto()
        page_view_sequence.page_views.append(page_view_proto)
        result.sequences.append(page_view_sequence)
    return result
    def test_no_requests(self):
        page_view = page_view_sequence_pb2.PageViewProto()
        page_view.contents.add()

        self.session.page_view_proto(page_view)
        self.session.page_view_proto(page_view)

        graphs = self.session.get_request_graphs()
        self.assertEqual(len(graphs), 2)

        self.assertTrue(
            request_graph.graph_has_independent_requests(graphs[0], []))
        self.assertTrue(
            request_graph.graph_has_independent_requests(graphs[1], []))
Пример #3
0
def sequence(views):
    """Helper to create a sequence of page view proto's."""
    result = []
    for view in views:
        page_view = page_view_sequence_pb2.PageViewProto()
        for font_name, codepoints in view.items():
            content = page_view_sequence_pb2.PageContentProto()
            content.font_name = font_name
            content.codepoints.extend(codepoints)
            page_view.contents.append(content)

        result.append(page_view)

    return result
Пример #4
0
def create_page_view(file_path):
    """Collects all of the codepoints in file_path and converts
  into a page view proto."""

    codepoints = set()
    with open(file_path, encoding='utf-8') as file:
        for char in file.read():
            codepoints.add(ord(char))

    page_view = page_view_sequence_pb2.PageViewProto()
    content = page_view_sequence_pb2.PageContentProto()
    content.font_name = "Roboto-Regular.ttf"
    content.codepoints.extend(sorted(codepoints))
    page_view.contents.append(content)

    return page_view
    def test_multiple_chained_requests(self):
        page_view = page_view_sequence_pb2.PageViewProto()
        content = page_view.contents.add()
        logged_request = content.logged_requests.add()
        logged_request.request_size = 12
        logged_request.response_size = 34
        content = page_view.contents.add()
        logged_request = content.logged_requests.add()
        logged_request.request_size = 56
        logged_request.response_size = 78

        self.session.page_view_proto(page_view)

        graphs = self.session.get_request_graphs()
        self.assertEqual(len(graphs), 1)
        self.assertTrue(
            request_graph.graph_has_independent_requests(
                graphs[0], [(12, 34), (56, 78)]))
    def test_chained_requests(self):
        page_view = page_view_sequence_pb2.PageViewProto()
        content = page_view.contents.add()
        logged_request = content.logged_requests.add()
        logged_request.request_size = 12
        logged_request.response_size = 34
        logged_request = content.logged_requests.add()
        logged_request.request_size = 56
        logged_request.response_size = 78

        self.session.page_view_proto(page_view)

        graphs = self.session.get_request_graphs()
        self.assertEqual(len(graphs), 1)

        graph = graphs[0]
        self.assertEqual(graph.length(), 2)
        self.assertEqual(len(graph.requests_that_can_run(set())), 1)
    def test_single_requests(self):
        page_view = page_view_sequence_pb2.PageViewProto()
        content = page_view.contents.add()
        logged_request = content.logged_requests.add()
        logged_request.request_size = 123
        logged_request.response_size = 456

        self.session.page_view_proto(page_view)
        self.session.page_view_proto(page_view)

        graphs = self.session.get_request_graphs()
        self.assertEqual(len(graphs), 2)

        self.assertTrue(
            request_graph.graph_has_independent_requests(
                graphs[0], [(123, 456)]))
        self.assertTrue(
            request_graph.graph_has_independent_requests(
                graphs[1], [(123, 456)]))