Exemplo n.º 1
0
 def test_attachment_parsing(self):
     bugzilla = Bugzilla()
     soup = BeautifulSoup(self._example_attachment)
     attachment_element = soup.find("attachment")
     attachment = bugzilla._parse_attachment_element(attachment_element, self._expected_example_attachment_parsing['bug_id'])
     self.assertTrue(attachment)
     self._assert_dictionaries_equal(attachment, self._expected_example_attachment_parsing)
Exemplo n.º 2
0
    def test_status_parsing(self):
        buildbot = BuildBot()

        soup = BeautifulSoup(self._example_one_box_status)
        status_table = soup.find("table")
        input_rows = status_table.findAll("tr")

        for x in range(len(input_rows)):
            status_row = input_rows[x]
            expected_parsing = self._expected_example_one_box_parsings[x]

            builder = buildbot._parse_builder_status_from_row(status_row)

            # Make sure we aren't parsing more or less than we expect
            self.assertEquals(builder.keys(), expected_parsing.keys())

            for key, expected_value in expected_parsing.items():
                self.assertEquals(
                    builder[key],
                    expected_value,
                    (
                        "Builder %d parse failure for key: %s: Actual='%s' Expected='%s'"
                        % (x, key, builder[key], expected_value)
                    ),
                )
Exemplo n.º 3
0
    def _parse_attachment_ids_request_query(self, page, since=None):
        # Formats
        digits = re.compile("\d+")
        attachment_href = re.compile("attachment.cgi\?id=\d+&action=review")
        # if no date is given, return all ids
        if not since:
            attachment_links = SoupStrainer("a", href=attachment_href)
            return [int(digits.search(tag["href"]).group(0))
                for tag in BeautifulSoup(page, parseOnlyThese=attachment_links)]

        # Parse the main table only
        date_format = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}")
        mtab = SoupStrainer("table", {"class": "requests"})
        soup = BeautifulSoup(page, parseOnlyThese=mtab)
        patch_ids = []

        for row in soup.findAll("tr"):
            patch_tag = row.find("a", {"href": attachment_href})
            if not patch_tag:
                continue
            patch_id = int(digits.search(patch_tag["href"]).group(0))
            date_tag = row.find("td", text=date_format)
            if date_tag and datetime.strptime(date_format.search(date_tag).group(0), "%Y-%m-%d %H:%M") < since:
                _log.info("Patch is old: %d (%s)" % (patch_id, date_tag))
                continue
            patch_ids.append(patch_id)
        return patch_ids
    def test_convert_vendor_prefix_js_paths(self):
        test_html = """<head>
<script src="/common/vendor-prefix.js">
</head>
"""
        fake_dir_path = self.fake_dir_path('adapterjspaths')
        converter = _W3CTestConverter(fake_dir_path, DUMMY_FILENAME)

        oc = OutputCapture()
        oc.capture_output()
        try:
            converter.feed(test_html)
            converter.close()
            converted = converter.output()
        finally:
            oc.restore_output()

        new_html = BeautifulSoup(converted[1])

        # Verify the original paths are gone, and the new paths are present.
        orig_path_pattern = re.compile('\"/common/vendor-prefix.js')
        self.assertEquals(len(new_html.findAll(src=orig_path_pattern)), 0, 'vendor-prefix.js path was not converted')

        resources_dir = converter.path_from_webkit_root("LayoutTests", "resources")
        new_relpath = os.path.relpath(resources_dir, fake_dir_path)
        relpath_pattern = re.compile(new_relpath)
        self.assertEquals(len(new_html.findAll(src=relpath_pattern)), 1, 'vendor-prefix.js relative path not correct')
Exemplo n.º 5
0
 def _parse_result_count(self, results_page):
     result_count_text = BeautifulSoup(results_page).find(attrs={'class': 'bz_result_count'}).string
     result_count_parts = result_count_text.strip().split(" ")
     if result_count_parts[0] == "Zarro":
         return 0
     if result_count_parts[0] == "One":
         return 1
     return int(result_count_parts[0])
Exemplo n.º 6
0
    def test_failures_from_fail_row(self):
        row = BeautifulSoup("<tr><td><a>test.hml</a></td><td><a>expected image</a></td><td><a>25%</a></td></tr>")
        test_name = unicode(row.find("a").string)
        # Even if the caller has already found the test name, findAll inside _failures_from_fail_row will see it again.
        failures = OutputCapture().assert_outputs(self, ORWTResultsHTMLParser._failures_from_fail_row, [row])
        self.assertEqual(len(failures), 1)
        self.assertEqual(type(sorted(failures)[0]), test_failures.FailureImageHashMismatch)

        row = BeautifulSoup("<tr><td><a>test.hml</a><a>foo</a></td></tr>")
        expected_stderr = "Unhandled link text in results.html parsing: foo.  Please file a bug against webkitpy.\n"
        OutputCapture().assert_outputs(self, ORWTResultsHTMLParser._failures_from_fail_row, [row], expected_stderr=expected_stderr)
 def _revisions_for_builder(self, builder):
     soup = BeautifulSoup(self._fetch_builder_page(builder))
     revisions = []
     for status_row in soup.find('table').findAll('tr'):
         revision_anchor = status_row.find('a')
         table_cells = status_row.findAll('td')
         if not table_cells or len(table_cells) < 3 or not table_cells[2].string:
             continue
         if revision_anchor and revision_anchor.string and re.match(r'^\d+$', revision_anchor.string):
             revisions.append((int(revision_anchor.string), 'success' in table_cells[2].string))
     return revisions
Exemplo n.º 8
0
    def verify_test_harness_paths(self, converter, converted, test_path, num_src_paths, num_href_paths):
        if isinstance(converted, basestring):
            converted = BeautifulSoup(converted)

        resources_dir = converter.path_from_webkit_root("LayoutTests", "resources")

        # Verify the original paths are gone, and the new paths are present.
        orig_path_pattern = re.compile('\"/resources/testharness')
        self.assertEquals(len(converted.findAll(src=orig_path_pattern)), 0, 'testharness src path was not converted')
        self.assertEquals(len(converted.findAll(href=orig_path_pattern)), 0, 'testharness href path was not converted')

        new_relpath = os.path.relpath(resources_dir, test_path)
        relpath_pattern = re.compile(new_relpath)
        self.assertEquals(len(converted.findAll(src=relpath_pattern)), num_src_paths, 'testharness src relative path not correct')
        self.assertEquals(len(converted.findAll(href=relpath_pattern)), num_href_paths, 'testharness href relative path not correct')
Exemplo n.º 9
0
 def _parse_bugs_from_xml(self, page):
     soup = BeautifulSoup(page)
     # Without the unicode() call, BeautifulSoup occasionally complains of being
     # passed None for no apparent reason.
     return [
         Bug(self._parse_bug_dictionary_from_xml(unicode(bug_xml)), self)
         for bug_xml in soup('bug')
     ]
Exemplo n.º 10
0
 def _parse_attachment_ids_request_query(self, page):
     digits = re.compile("\d+")
     attachment_href = re.compile("attachment.cgi\?id=\d+&action=review")
     attachment_links = SoupStrainer("a", href=attachment_href)
     return [
         int(digits.search(tag["href"]).group(0))
         for tag in BeautifulSoup(page, parseOnlyThese=attachment_links)
     ]
Exemplo n.º 11
0
 def _fetch_bug_ids_advanced_query(self, query):
     soup = BeautifulSoup(self._load_query(query))
     # The contents of the <a> inside the cells in the first column happen
     # to be the bug id.
     return [
         int(bug_link_cell.find("a").string)
         for bug_link_cell in soup('td', "first-child")
     ]
Exemplo n.º 12
0
 def convert_html(self, new_path, contents, filename):
     doc = BeautifulSoup(contents)
     did_modify_paths = self.convert_testharness_paths(
         doc, new_path, filename)
     converted_properties_and_content = self.convert_prefixed_properties(
         doc, filename)
     return converted_properties_and_content if (
         did_modify_paths or converted_properties_and_content[0]) else None
Exemplo n.º 13
0
    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info
        if test_contents is not None:
            self.test_doc = Parser(test_contents)
        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning('Multiple references are not supported. Importing the first ref defined in %s',
                             self.filesystem.basename(self.filename))

            try:
                ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
            except KeyError as e:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                return None

            if (ref_file == self.filename):
                return {'referencefile': self.filename}

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(self.filesystem.dirname(self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

        # not all reference tests have a <link rel='match'> element in WPT repo
        elif self.is_wpt_reftest():
            test_info = {'test': self.filename, 'reference': self.potential_ref_filename()}
            test_info['reference_support_info'] = {}
        # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests
        elif self.is_wpt_manualtest():
            test_info = {'test': self.filename, 'manualtest': True}
        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif '-ref' in self.filename or 'reference' in self.filename:
            test_info = {'referencefile': self.filename}
        elif self.options['all'] is True:
            test_info = {'test': self.filename}

        return test_info
Exemplo n.º 14
0
    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = Parser(test_contents)

        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest

        matches = self.reference_links_of_type(
            'match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                print 'Warning: Webkit does not support multiple references. Importing the first ref defined in ' + self.filesystem.basename(
                    self.filename)

            ref_file = self.filesystem.join(
                self.filesystem.dirname(self.filename), matches[0]['href'])
            if self.ref_doc is None:
                self.ref_doc = self.load_file(ref_file)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file path is relative, we need to check it for
            # relative paths also because when it lands in WebKit, it will be
            # moved down into the test dir.
            #
            # Note: The test files themselves are not checked for support files
            # outside their directories as the convention in the CSSWG is to
            # put all support files in the same dir or subdir as the test.
            #
            # All non-test files in the test's directory tree are normally
            # copied as part of the import as they are assumed to be required
            # support files.
            #
            # *But*, there is exactly one case in the entire css2.1 suite where
            # at test depends on a file that lives in a different directory,
            # which depends on another file that lives outside of its
            # directory. This code covers that case :)
            if matches[0]['href'].startswith('..'):
                support_files = self.support_files(self.ref_doc)
                test_info['refsupport'] = support_files

        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif self.options['all'] is True and not (
                '-ref' in self.filename) and not ('reference'
                                                  in self.filename):
            test_info = {'test': self.filename}

        return test_info
Exemplo n.º 15
0
 def _parse_bug_id_from_attachment_page(self, page):
     # The "Up" relation happens to point to the bug.
     up_link = BeautifulSoup(page).find('link', rel='Up')
     if not up_link:
         # This attachment does not exist (or you don't have permissions to
         # view it).
         return None
     match = re.search("show_bug.cgi\?id=(?P<bug_id>\d+)", up_link['href'])
     return int(match.group('bug_id'))
Exemplo n.º 16
0
    def test_failures_from_fail_row(self):
        row = BeautifulSoup(
            "<tr><td><a>test.hml</a></td><td><a>expected image</a></td><td><a>25%</a></td></tr>"
        )
        test_name = unicode(row.find("a").string)
        # Even if the caller has already found the test name, findAll inside _failures_from_fail_row will see it again.
        failures = OutputCapture().assert_outputs(
            self, ORWTResultsHTMLParser._failures_from_fail_row, [row])
        self.assertEqual(len(failures), 1)
        self.assertEqual(type(sorted(failures)[0]),
                         test_failures.FailureImageHashMismatch)

        row = BeautifulSoup("<tr><td><a>test.hml</a><a>foo</a></td></tr>")
        expected_stderr = "Unhandled link text in results.html parsing: foo.  Please file a bug against webkitpy.\n"
        OutputCapture().assert_outputs(
            self,
            ORWTResultsHTMLParser._failures_from_fail_row, [row],
            expected_stderr=expected_stderr)
Exemplo n.º 17
0
    def test_status_parsing(self):
        buildbot = BuildBot()

        soup = BeautifulSoup(self._example_one_box_status)
        status_table = soup.find("table")
        input_rows = status_table.findAll('tr')

        for x in range(len(input_rows)):
            status_row = input_rows[x]
            expected_parsing = self._expected_example_one_box_parsings[x]

            builder = buildbot._parse_builder_status_from_row(status_row)

            # Make sure we aren't parsing more or less than we expect
            self.assertEquals(builder.keys(), expected_parsing.keys())

            for key, expected_value in expected_parsing.items():
                self.assertEquals(builder[key], expected_value, ("Builder %d parse failure for key: %s: Actual='%s' Expected='%s'" % (x, key, builder[key], expected_value)))
Exemplo n.º 18
0
    def analyze_test(self, test_contents=None, ref_contents=None):
        """Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires.

        Returns: A dict which can have the properties:
            "test": test file name.
            "reference": related reference test file name if this is a reference test.
            "reference_support_info": extra information about the related reference test and any support files.
            "jstest": A boolean, whether this is a JS test.
            If the given contents are empty, then None is returned.
        """
        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = BeautifulSoup(test_contents)

        if ref_contents is not None:
            self.ref_doc = BeautifulSoup(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning('Multiple references are not supported. Importing the first ref defined in %s',
                             self.filesystem.basename(self.filename))

            try:
                ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
            except KeyError:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                return None

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files.
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(self.filesystem.dirname(
                        self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif self.options['all'] and '-ref' not in self.filename and 'reference' not in self.filename:
            test_info = {'test': self.filename}

        return test_info
Exemplo n.º 19
0
 def _parse_result_count(self, results_page):
     result_count_text = BeautifulSoup(results_page).find(attrs={'class': 'bz_result_count'})
     if result_count_text is None or result_count_text.string is None:
         _log.warn("BeautifulSoup returned None while finding class: bz_result_count in:\n{}".format(results_page))
         return 0
     result_count_parts = result_count_text.string.strip().split(" ")
     if result_count_parts[0] == "Zarro":
         return 0
     if result_count_parts[0] == "One":
         return 1
     return int(result_count_parts[0])
Exemplo n.º 20
0
 def _parse_bug_id_from_attachment_page(self, page):
     # The "Up" relation happens to point to the bug.
     title = BeautifulSoup(page).find('div', attrs={'id': 'bug_title'})
     if not title:
         _log.warning("This attachment does not exist (or you don't have permissions to view it).")
         return None
     match = re.search(r"show_bug.cgi\?id=(?P<bug_id>\d+)", str(title))
     if not match:
         _log.warning("Unable to parse bug id from attachment")
         return None
     return int(match.group('bug_id'))
Exemplo n.º 21
0
    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = Parser(test_contents)

        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest

        matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                print 'Warning: Webkit does not support multiple references. Importing the first ref defined in ' + self.filesystem.basename(self.filename)

            ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
            if self.ref_doc is None:
                self.ref_doc = self.load_file(ref_file)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file path is relative, we need to check it for
            # relative paths also because when it lands in WebKit, it will be
            # moved down into the test dir.
            #
            # Note: The test files themselves are not checked for support files
            # outside their directories as the convention in the CSSWG is to
            # put all support files in the same dir or subdir as the test.
            #
            # All non-test files in the test's directory tree are normally
            # copied as part of the import as they are assumed to be required
            # support files.
            #
            # *But*, there is exactly one case in the entire css2.1 suite where
            # at test depends on a file that lives in a different directory,
            # which depends on another file that lives outside of its
            # directory. This code covers that case :)
            if matches[0]['href'].startswith('..'):
                support_files = self.support_files(self.ref_doc)
                test_info['refsupport'] = support_files

        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename):
            test_info = {'test': self.filename}

        return test_info
Exemplo n.º 22
0
 def _parse_bug_dictionary_from_xml(self, page):
     soup = BeautifulSoup(page)
     bug = {}
     bug["id"] = int(soup.find("bug_id").string)
     bug["title"] = self._string_contents(soup.find("short_desc"))
     bug["bug_status"] = self._string_contents(soup.find("bug_status"))
     dup_id = soup.find("dup_id")
     if dup_id:
         bug["dup_id"] = self._string_contents(dup_id)
     bug["reporter_email"] = self._string_contents(soup.find("reporter"))
     bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to"))
     bug["cc_emails"] = [self._string_contents(element) for element in soup.findAll('cc')]
     bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')]
     return bug
Exemplo n.º 23
0
 def _parse_mks_response_for_mks_id(response):
     # Part of the response may be encoded as HTML entities. We need to
     # decode such entities so as to retrieve the text of <value>
     decoded = BeautifulSoup(response,
                             convertEntities=BeautifulSoup.XML_ENTITIES)
     soup = BeautifulSoup(decoded.encode("UTF-8"))
     return int(soup.find("value").string)
Exemplo n.º 24
0
 def _parse_bug_page(self, page):
     soup = BeautifulSoup(page)
     bug = {}
     bug["id"] = int(soup.find("bug_id").string)
     bug["title"] = self._string_contents(soup.find("short_desc"))
     bug["reporter_email"] = self._string_contents(soup.find("reporter"))
     bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to"))
     bug["cc_emails"] = [self._string_contents(element)
                         for element in soup.findAll('cc')]
     bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')]
     return bug
Exemplo n.º 25
0
    def _parse_results_html(cls, page):
        parsed_results = {}
        tables = BeautifulSoup(page).findAll("table")
        for table in tables:
            table_title = unicode(table.findPreviousSibling("p").string)
            if table_title not in cls.expected_keys:
                # This Exception should only ever be hit if run-webkit-tests changes its results.html format.
                raise Exception("Unhandled title: %s" % table_title)
            # We might want to translate table titles into identifiers before storing.
            parsed_results[table_title] = [
                unicode(row.find("a").string) for row in table.findAll("tr")
            ]

        return parsed_results
Exemplo n.º 26
0
 def load_file(self, filename):
     if self.filesystem.isfile(filename):
         try:
             self.test_doc = Parser(self.filesystem.read_binary_file(filename))
         except:
             # FIXME: Figure out what to do if we can't parse the file.
             _log.error("Failed to parse %s", filename)
             self.test_doc is None
     else:
         if self.filesystem.isdir(filename):
             # FIXME: Figure out what is triggering this and what to do about it.
             _log.error("Trying to load %s, which is a directory", filename)
         self.test_doc = None
     self.ref_doc = None
Exemplo n.º 27
0
    def user_dict_from_edit_user_page(self, page):
        soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES)
        user_table = soup.find("table", {'class': 'main'})
        user_dict = {}
        for row in user_table('tr'):
            label_element = row.find('label')
            if not label_element:
                continue  # This must not be a row we know how to parse.
            if row.find('table'):
                continue  # Skip the <tr> holding the groups table.

            key = label_element['for']
            if "group" in key:
                key = "groups"
                value = user_dict.get('groups', set())
                # We must be parsing a "tr" inside the inner group table.
                (group_name, _) = self._group_name_and_string_from_row(row)
                if row.find('input', {'type': 'checkbox', 'checked': 'checked'}):
                    value.add(group_name)
            else:
                value = unicode(row.find('td').string).strip()
            user_dict[key] = value
        return user_dict
Exemplo n.º 28
0
    def user_dict_from_edit_user_page(self, page):
        soup = BeautifulSoup(page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
        user_table = soup.find("table", {'class': 'main'})
        user_dict = {}
        for row in user_table('tr'):
            label_element = row.find('label')
            if not label_element:
                continue  # This must not be a row we know how to parse.
            if row.find('table'):
                continue  # Skip the <tr> holding the groups table.

            key = label_element['for']
            if "group" in key:
                key = "groups"
                value = user_dict.get('groups', set())
                # We must be parsing a "tr" inside the inner group table.
                (group_name, _) = self._group_name_and_string_from_row(row)
                if row.find('input', {'type': 'checkbox', 'checked': 'checked'}):
                    value.add(group_name)
            else:
                value = unicode(row.find('td').string).strip()
            user_dict[key] = value
        return user_dict
Exemplo n.º 29
0
    def user_dict_from_edit_user_page(self, page):
        soup = BeautifulSoup(page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
        user_table = soup.find("table", {"class": "main"})
        user_dict = {}
        for row in user_table("tr"):
            label_element = row.find("label")
            if not label_element:
                continue  # This must not be a row we know how to parse.
            if row.find("table"):
                continue  # Skip the <tr> holding the groups table.

            key = label_element["for"]
            if "group" in key:
                key = "groups"
                value = user_dict.get("groups", set())
                # We must be parsing a "tr" inside the inner group table.
                (group_name, _) = self._group_name_and_string_from_row(row)
                if row.find("input", {"type": "checkbox", "checked": "checked"}):
                    value.add(group_name)
            else:
                value = unicode(row.find("td").string).strip()
            user_dict[key] = value
        return user_dict
Exemplo n.º 30
0
    def _check_create_bug_response(self, response_html):
        response_html = string_utils.decode(response_html, target_type=str)
        match = re.search(r'<title>Bug (?P<bug_id>\d+) Submitted[^<]*</title>', response_html)
        if match:
            return match.group('bug_id')

        match = re.search(
            '<div id="bugzilla-body">(?P<error_message>.+)<div id="footer">',
            response_html,
            re.DOTALL)
        error_message = "FAIL"
        if match:
            text_lines = BeautifulSoup(match.group('error_message')).findAll(text=True)
            error_message = "\n" + '\n'.join(["  " + line.strip() for line in text_lines if line.strip()])
        raise Exception("Bug not created: {}".format(error_message))
Exemplo n.º 31
0
 def load_file(self, filename):
     if self.filesystem.isfile(filename):
         try:
             self.test_doc = Parser(
                 self.filesystem.read_binary_file(filename))
         except:
             # FIXME: Figure out what to do if we can't parse the file.
             _log.error("Failed to parse %s", filename)
             self.test_doc is None
     else:
         if self.filesystem.isdir(filename):
             # FIXME: Figure out what is triggering this and what to do about it.
             _log.error("Trying to load %s, which is a directory", filename)
         self.test_doc = None
     self.ref_doc = None
Exemplo n.º 32
0
 def _parse_bug_page(self, page):
     soup = BeautifulSoup(page)
     bug = {}
     bug["id"] = int(soup.find("bug_id").string)
     bug["title"] = unicode(soup.find("short_desc").string)
     bug["reporter_email"] = str(soup.find("reporter").string)
     bug["assigned_to_email"] = str(soup.find("assigned_to").string)
     bug["cc_emails"] = [str(element.string)
                         for element in soup.findAll('cc')]
     bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')]
     return bug
    def verify_test_harness_paths(self, converter, converted, test_path, num_src_paths, num_href_paths):
        if isinstance(converted, basestring):
            converted = BeautifulSoup(converted)

        resources_dir = converter.path_from_webkit_root("LayoutTests", "resources")

        # Verify the original paths are gone, and the new paths are present.
        orig_path_pattern = re.compile('\"/resources/testharness')
        self.assertEquals(len(converted.findAll(src=orig_path_pattern)), 0, 'testharness src path was not converted')
        self.assertEquals(len(converted.findAll(href=orig_path_pattern)), 0, 'testharness href path was not converted')

        new_relpath = os.path.relpath(resources_dir, test_path)
        relpath_pattern = re.compile(new_relpath)
        self.assertEquals(len(converted.findAll(src=relpath_pattern)), num_src_paths, 'testharness src relative path not correct')
        self.assertEquals(len(converted.findAll(href=relpath_pattern)), num_href_paths, 'testharness href relative path not correct')
Exemplo n.º 34
0
    def load_file(self, filename, is_ref=False):
        if self.filesystem.isfile(filename):
            try:
                doc = BeautifulSoup(self.filesystem.read_binary_file(filename))
            except IOError:
                _log.error("IOError: Failed to read %s", filename)
                doc = None
            except HTMLParser.HTMLParseError:
                # FIXME: Figure out what to do if we can't parse the file.
                _log.error("HTMLParseError: Failed to parse %s", filename)
                doc = None
        else:
            if self.filesystem.isdir(filename):
                # FIXME: Figure out what is triggering this and what to do about it.
                _log.error("Trying to load %s, which is a directory", filename)
            doc = None

        if is_ref:
            self.ref_doc = doc
        else:
            self.test_doc = doc
    def test_convert_test_harness_paths(self):
        """ Tests convert_testharness_paths() with a test that uses all three testharness files """

        test_html = """<head>
<link href="/resources/testharness.css" rel="stylesheet" type="text/css">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
</head>
"""
        converter = W3CTestConverter()

        fake_dir_path = self.fake_dir_path(converter, 'testharnesspaths')

        doc = BeautifulSoup(test_html)
        oc = OutputCapture()
        oc.capture_output()
        try:
            converted = converter.convert_testharness_paths(doc, fake_dir_path, DUMMY_FILENAME)
        finally:
            oc.restore_output()

        self.verify_conversion_happened(converted)
        self.verify_test_harness_paths(converter, doc, fake_dir_path, 2, 1)
Exemplo n.º 36
0
 def login_userid_pairs_from_edit_user_results(self, results_page):
     soup = BeautifulSoup(results_page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
     results_table = soup.find(id="admin_table")
     login_userid_pairs = [self._login_and_uid_from_row(row) for row in results_table('tr')]
     # Filter out None from the logins.
     return filter(lambda pair: bool(pair), login_userid_pairs)
Exemplo n.º 37
0
 def load_file(self, filename):
     if self.filesystem.exists(filename):
         self.test_doc = Parser(self.filesystem.read_text_file(filename))
     else:
         self.test_doc = None
     self.ref_doc = None
Exemplo n.º 38
0
class TestParser(object):
    def __init__(self, options, filename):
        self.options = options
        self.filename = filename
        self.host = Host()
        self.filesystem = self.host.filesystem

        self.test_doc = None
        self.ref_doc = None
        self.load_file(filename)

    def load_file(self, filename):
        if self.filesystem.isfile(filename):
            try:
                self.test_doc = Parser(self.filesystem.read_binary_file(filename))
            except:
                # FIXME: Figure out what to do if we can't parse the file.
                _log.error("Failed to parse %s", filename)
                self.test_doc is None
        else:
            if self.filesystem.isdir(filename):
                # FIXME: Figure out what is triggering this and what to do about it.
                _log.error("Trying to load %s, which is a directory", filename)
            self.test_doc = None
        self.ref_doc = None

    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = Parser(test_contents)

        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest

        matches = self.reference_links_of_type("match") + self.reference_links_of_type("mismatch")
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning(
                    "Multiple references are not supported. Importing the first ref defined in %s",
                    self.filesystem.basename(self.filename),
                )

            try:
                ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]["href"])
            except KeyError as e:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                return None

            if self.ref_doc is None:
                self.ref_doc = self.load_file(ref_file)

            test_info = {"test": self.filename, "reference": ref_file}

            # If the ref file path is relative, we need to check it for
            # relative paths also because when it lands in WebKit, it will be
            # moved down into the test dir.
            #
            # Note: The test files themselves are not checked for support files
            # outside their directories as the convention in the CSSWG is to
            # put all support files in the same dir or subdir as the test.
            #
            # All non-test files in the test's directory tree are normally
            # copied as part of the import as they are assumed to be required
            # support files.
            #
            # *But*, there is exactly one case in the entire css2.1 suite where
            # a test depends on a file that lives in a different directory,
            # which depends on another file that lives outside of its
            # directory. This code covers that case :)
            if matches[0]["href"].startswith(".."):
                support_files = self.support_files(self.ref_doc)
                test_info["refsupport"] = support_files

        elif self.is_jstest():
            test_info = {"test": self.filename, "jstest": True}
        elif self.options["all"] is True and not ("-ref" in self.filename) and not ("reference" in self.filename):
            test_info = {"test": self.filename}

        return test_info

    def reference_links_of_type(self, reftest_type):
        return self.test_doc.findAll(rel=reftest_type)

    def is_jstest(self):
        """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
        return bool(self.test_doc.find(src=re.compile("['\"/]?/resources/testharness")))

    def support_files(self, doc):
        """ Searches the file for all paths specified in url()'s, href or src attributes."""
        support_files = []

        if doc is None:
            return support_files

        elements_with_src_attributes = doc.findAll(src=re.compile(".*"))
        elements_with_href_attributes = doc.findAll(href=re.compile(".*"))

        url_pattern = re.compile("url\(.*\)")
        urls = []
        for url in doc.findAll(text=url_pattern):
            url = re.search(url_pattern, url)
            url = re.sub("url\(['\"]?", "", url.group(0))
            url = re.sub("['\"]?\)", "", url)
            urls.append(url)

        src_paths = [src_tag["src"] for src_tag in elements_with_src_attributes]
        href_paths = [href_tag["href"] for href_tag in elements_with_href_attributes]

        paths = src_paths + href_paths + urls
        for path in paths:
            if not (path.startswith("http:")) and not (path.startswith("mailto:")):
                support_files.append(path)

        return support_files
Exemplo n.º 39
0
class TestParser(object):

    def __init__(self, filename, host):
        self.filename = filename
        self.host = host
        self.filesystem = self.host.filesystem

        self.test_doc = None
        self.ref_doc = None
        self.load_file(filename)

    def load_file(self, filename, is_ref=False):
        if self.filesystem.isfile(filename):
            try:
                doc = BeautifulSoup(self.filesystem.read_binary_file(filename))
            except IOError:
                _log.error("IOError: Failed to read %s", filename)
                doc = None
            except HTMLParser.HTMLParseError:
                # FIXME: Figure out what to do if we can't parse the file.
                _log.error("HTMLParseError: Failed to parse %s", filename)
                doc = None
        else:
            if self.filesystem.isdir(filename):
                # FIXME: Figure out what is triggering this and what to do about it.
                _log.error("Trying to load %s, which is a directory", filename)
            doc = None

        if is_ref:
            self.ref_doc = doc
        else:
            self.test_doc = doc

    def analyze_test(self, test_contents=None, ref_contents=None):
        """Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires.

        Returns: A dict which can have the properties:
            "test": test file name.
            "reference": related reference test file name if this is a reference test.
            "reference_support_info": extra information about the related reference test and any support files.
            "jstest": A boolean, whether this is a JS test.
            If the path doesn't look a test or the given contents are empty,
            then None is returned.
        """
        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = BeautifulSoup(test_contents)

        if ref_contents is not None:
            self.ref_doc = BeautifulSoup(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning('Multiple references are not supported. Importing the first ref defined in %s',
                             self.filesystem.basename(self.filename))

            try:
                ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
            except KeyError:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                return None

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files.
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(self.filesystem.dirname(
                        self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}

        elif 'csswg-test' in self.filename:
            # In csswg-test, all other files should be manual tests.
            # This function isn't called for non-test files in support/.
            test_info = {'test': self.filename}

        elif '-manual.' in self.filesystem.basename(self.filename):
            # WPT has a naming convention for manual tests.
            test_info = {'test': self.filename}

        return test_info

    def reference_links_of_type(self, reftest_type):
        return self.test_doc.findAll(rel=reftest_type)

    def is_jstest(self):
        """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
        return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))

    def support_files(self, doc):
        """Searches the file for all paths specified in url()s or src attributes."""
        support_files = []

        if doc is None:
            return support_files

        elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
        elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

        url_pattern = re.compile(r'url\(.*\)')
        urls = []
        for url in doc.findAll(text=url_pattern):
            url = re.search(url_pattern, url)
            url = re.sub(r'url\([\'\"]?', '', url.group(0))
            url = re.sub(r'[\'\"]?\)', '', url)
            urls.append(url)

        src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
        href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]

        paths = src_paths + href_paths + urls
        for path in paths:
            if not path.startswith('http:') and not path.startswith('mailto:'):
                uri_scheme_pattern = re.compile(r'[A-Za-z][A-Za-z+.-]*:')
                if not uri_scheme_pattern.match(path):
                    support_files.append(path)

        return support_files
Exemplo n.º 40
0
 def _parse_mks_response_for_mks_id(response):
     # Part of the response may be encoded as HTML entities. We need to
     # decode such entities so as to retrieve the text of <value>
     decoded = BeautifulSoup(response, convertEntities=BeautifulSoup.XML_ENTITIES)
     soup = BeautifulSoup(decoded.encode("UTF-8"))
     return int(soup.find("value").string)
Exemplo n.º 41
0
 def builder_statuses(self):
     soup = BeautifulSoup(self._fetch_one_box_per_builder())
     return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
Exemplo n.º 42
0
 def _parse_bug_title_from_attachment_page(self, page):
     return BeautifulSoup(page).find('div', attrs={'id': 'bug_title'})
Exemplo n.º 43
0
 def _parse_logins_from_editusers_results(self, results_page):
     soup = BeautifulSoup(results_page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
     results_table = soup.find(id="admin_table")
     logins = [self._login_from_row(row) for row in results_table('tr')]
     # Filter out None from the logins.
     return filter(lambda login: bool(login), logins)
Exemplo n.º 44
0
class TestParser(object):

    def __init__(self, options, filename, host=Host()):
        self.options = options
        self.filename = filename
        self.host = host
        self.filesystem = self.host.filesystem

        self.test_doc = None
        self.ref_doc = None
        self.load_file(filename)

    def load_file(self, filename, is_ref=False):
        if self.filesystem.isfile(filename):
            try:
                doc = Parser(self.filesystem.read_binary_file(filename))
            except:
                # FIXME: Figure out what to do if we can't parse the file.
                _log.error("Failed to parse %s", filename)
                doc = None
        else:
            if self.filesystem.isdir(filename):
                # FIXME: Figure out what is triggering this and what to do about it.
                _log.error("Trying to load %s, which is a directory", filename)
            doc = None

        if is_ref:
            self.ref_doc = doc
        else:
            self.test_doc = doc

    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info
        if test_contents is not None:
            self.test_doc = Parser(test_contents)
        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning('Multiple references are not supported. Importing the first ref defined in %s',
                             self.filesystem.basename(self.filename))

            try:
                ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
            except KeyError as e:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                return None

            if (ref_file == self.filename):
                return {'referencefile': self.filename}

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(self.filesystem.dirname(self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

        # not all reference tests have a <link rel='match'> element in WPT repo
        elif self.is_wpt_reftest():
            test_info = {'test': self.filename, 'reference': self.potential_ref_filename()}
            test_info['reference_support_info'] = {}
        # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests
        elif self.is_wpt_manualtest():
            test_info = {'test': self.filename, 'manualtest': True}
        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif '-ref' in self.filename or 'reference' in self.filename:
            test_info = {'referencefile': self.filename}
        elif self.options['all'] is True:
            test_info = {'test': self.filename}

        if test_info and self.is_slow_test():
            test_info['slow'] = True

        return test_info

    def reference_links_of_type(self, reftest_type):
        return self.test_doc.findAll(rel=reftest_type)

    def is_jstest(self):
        """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
        return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))

    def is_wpt_manualtest(self):
        """Returns whether the test is a manual test according WPT rules (i.e. file ends with -manual.htm path)."""
        return self.filename.endswith('-manual.htm') or self.filename.endswith('-manual.html')

    def is_slow_test(self):
        return any([match.name == 'meta' and match['name'] == 'timeout' for match in self.test_doc.findAll(content='long')])

    def potential_ref_filename(self):
        parts = self.filesystem.splitext(self.filename)
        return parts[0] + '-ref' + parts[1]

    def is_wpt_reftest(self):
        """Returns whether the test is a ref test according WPT rules (i.e. file has a -ref.html counterpart)."""
        parts = self.filesystem.splitext(self.filename)
        return  self.filesystem.isfile(self.potential_ref_filename())

    def support_files(self, doc):
        """ Searches the file for all paths specified in url()'s, href or src attributes."""
        support_files = []

        if doc is None:
            return support_files

        elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
        elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

        url_pattern = re.compile('url\(.*\)')
        urls = []
        for url in doc.findAll(text=url_pattern):
            url = re.search(url_pattern, url)
            url = re.sub('url\([\'\"]?', '', url.group(0))
            url = re.sub('[\'\"]?\)', '', url)
            urls.append(url)

        src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
        href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]

        paths = src_paths + href_paths + urls
        for path in paths:
            uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:")
            if not uri_scheme_pattern.match(path):
                support_files.append(path)

        return support_files
Exemplo n.º 45
0
 def _parse_quips(self, page):
     soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES)
     quips = soup.find(text=re.compile(r"Existing quips:")).findNext("ul").findAll("li")
     return [unicode(quip_entry.string) for quip_entry in quips]
Exemplo n.º 46
0
class TestParser(object):
    def __init__(self, options, filename):
        self.options = options
        self.filename = filename
        self.host = Host()
        self.filesystem = self.host.filesystem

        self.test_doc = None
        self.ref_doc = None
        self.load_file(filename)

    def load_file(self, filename, is_ref=False):
        if self.filesystem.isfile(filename):
            try:
                doc = Parser(self.filesystem.read_binary_file(filename))
            except:
                # FIXME: Figure out what to do if we can't parse the file.
                _log.error("Failed to parse %s", filename)
                doc = None
        else:
            if self.filesystem.isdir(filename):
                # FIXME: Figure out what is triggering this and what to do about it.
                _log.error("Trying to load %s, which is a directory", filename)
            doc = None

        if is_ref:
            self.ref_doc = doc
        else:
            self.test_doc = doc

    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = Parser(test_contents)

        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type(
            'match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning(
                    'Multiple references are not supported. Importing the first ref defined in %s',
                    self.filesystem.basename(self.filename))

            try:
                ref_file = self.filesystem.join(
                    self.filesystem.dirname(self.filename), matches[0]['href'])
            except KeyError as e:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"',
                           self.filesystem)
                return None

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(
                    self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(
                        self.filesystem.dirname(self.filename),
                        self.filesystem.dirname(
                            ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {
                        'reference_relpath': reference_relpath,
                        'files': reference_support_files
                    }

        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif self.options['all'] is True and not (
                '-ref' in self.filename) and not ('reference'
                                                  in self.filename):
            test_info = {'test': self.filename}

        return test_info

    def reference_links_of_type(self, reftest_type):
        return self.test_doc.findAll(rel=reftest_type)

    def is_jstest(self):
        """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
        return bool(
            self.test_doc.find(
                src=re.compile('[\'\"/]?/resources/testharness')))

    def support_files(self, doc):
        """ Searches the file for all paths specified in url()'s or src attributes."""
        support_files = []

        if doc is None:
            return support_files

        elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
        elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

        url_pattern = re.compile('url\(.*\)')
        urls = []
        for url in doc.findAll(text=url_pattern):
            url = re.search(url_pattern, url)
            url = re.sub('url\([\'\"]?', '', url.group(0))
            url = re.sub('[\'\"]?\)', '', url)
            urls.append(url)

        src_paths = [
            src_tag['src'] for src_tag in elements_with_src_attributes
        ]
        href_paths = [
            href_tag['href'] for href_tag in elements_with_href_attributes
        ]

        paths = src_paths + href_paths + urls
        for path in paths:
            if not (path.startswith('http:')) and not (
                    path.startswith('mailto:')):
                uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:")
                if not uri_scheme_pattern.match(path):
                    support_files.append(path)

        return support_files
Exemplo n.º 47
0
class TestParser(object):

    def __init__(self, options, filename):
        self.options = options
        self.filename = filename
        self.host = Host()
        self.filesystem = self.host.filesystem

        self.test_doc = None
        self.ref_doc = None
        self.load_file(filename)

    def load_file(self, filename):
        if self.filesystem.exists(filename):
            self.test_doc = Parser(self.filesystem.read_text_file(filename))
        else:
            self.test_doc = None
        self.ref_doc = None

    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = Parser(test_contents)

        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest

        matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                print 'Warning: Webkit does not support multiple references. Importing the first ref defined in ' + self.filesystem.basename(self.filename)

            ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
            if self.ref_doc is None:
                self.ref_doc = self.load_file(ref_file)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file path is relative, we need to check it for
            # relative paths also because when it lands in WebKit, it will be
            # moved down into the test dir.
            #
            # Note: The test files themselves are not checked for support files
            # outside their directories as the convention in the CSSWG is to
            # put all support files in the same dir or subdir as the test.
            #
            # All non-test files in the test's directory tree are normally
            # copied as part of the import as they are assumed to be required
            # support files.
            #
            # *But*, there is exactly one case in the entire css2.1 suite where
            # at test depends on a file that lives in a different directory,
            # which depends on another file that lives outside of its
            # directory. This code covers that case :)
            if matches[0]['href'].startswith('..'):
                support_files = self.support_files(self.ref_doc)
                test_info['refsupport'] = support_files

        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename):
            test_info = {'test': self.filename}

        return test_info

    def reference_links_of_type(self, reftest_type):
        return self.test_doc.findAll(rel=reftest_type)

    def is_jstest(self):
        """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
        return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))

    def support_files(self, doc):
        """ Searches the file for all paths specified in url()'s, href or src attributes."""
        support_files = []

        if doc is None:
            return support_files

        elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
        elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

        url_pattern = re.compile('url\(.*\)')
        urls = []
        for url in doc.findAll(text=url_pattern):
            url = re.search(url_pattern, url)
            url = re.sub('url\([\'\"]', '', url.group(0))
            url = re.sub('[\'\"]\)', '', url)
            urls.append(url)

        src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
        href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]

        paths = src_paths + href_paths + urls
        for path in paths:
            if not(path.startswith('http:')) and not(path.startswith('mailto:')):
                support_files.append(path)

        return support_files
Exemplo n.º 48
0
    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = Parser(test_contents)

        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type(
            'match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning(
                    'Multiple references are not supported. Importing the first ref defined in %s',
                    self.filesystem.basename(self.filename))

            try:
                ref_file = self.filesystem.join(
                    self.filesystem.dirname(self.filename), matches[0]['href'])
            except KeyError as e:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"',
                           self.filesystem)
                return None

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(
                    self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(
                        self.filesystem.dirname(self.filename),
                        self.filesystem.dirname(
                            ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {
                        'reference_relpath': reference_relpath,
                        'files': reference_support_files
                    }

        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif self.options['all'] is True and not (
                '-ref' in self.filename) and not ('reference'
                                                  in self.filename):
            test_info = {'test': self.filename}

        return test_info
class TestParser(object):

    def __init__(self, options, filename):
        self.options = options
        self.filename = filename
        self.host = Host()
        self.filesystem = self.host.filesystem

        self.test_doc = None
        self.ref_doc = None
        self.load_file(filename)

    def load_file(self, filename, is_ref=False):
        if self.filesystem.isfile(filename):
            try:
                doc = Parser(self.filesystem.read_binary_file(filename))
            except:
                # FIXME: Figure out what to do if we can't parse the file.
                _log.error("Failed to parse %s", filename)
                doc = None
        else:
            if self.filesystem.isdir(filename):
                # FIXME: Figure out what is triggering this and what to do about it.
                _log.error("Trying to load %s, which is a directory", filename)
            doc = None

        if is_ref:
            self.ref_doc = doc
        else:
            self.test_doc = doc

    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = Parser(test_contents)

        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning('Multiple references are not supported. Importing the first ref defined in %s',
                             self.filesystem.basename(self.filename))

            try:
                ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
            except KeyError as e:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                return None

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(self.filesystem.dirname(
                        self.filename), self.filesystem.dirname(ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {'reference_relpath': reference_relpath, 'files': reference_support_files}

        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename):
            test_info = {'test': self.filename}

        return test_info

    def reference_links_of_type(self, reftest_type):
        return self.test_doc.findAll(rel=reftest_type)

    def is_jstest(self):
        """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
        return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))

    def support_files(self, doc):
        """ Searches the file for all paths specified in url()'s or src attributes."""
        support_files = []

        if doc is None:
            return support_files

        elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
        elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

        url_pattern = re.compile('url\(.*\)')
        urls = []
        for url in doc.findAll(text=url_pattern):
            url = re.search(url_pattern, url)
            url = re.sub('url\([\'\"]?', '', url.group(0))
            url = re.sub('[\'\"]?\)', '', url)
            urls.append(url)

        src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
        href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]

        paths = src_paths + href_paths + urls
        for path in paths:
            if not(path.startswith('http:')) and not(path.startswith('mailto:')):
                uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:")
                if not uri_scheme_pattern.match(path):
                    support_files.append(path)

        return support_files
Exemplo n.º 50
0
 def _parse_twisted_directory_listing(self, page):
     soup = BeautifulSoup(page)
     # HACK: Match only table rows with a class to ignore twisted header/footer rows.
     file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
     return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
Exemplo n.º 51
0
 def _parse_twisted_directory_listing(self, page):
     soup = BeautifulSoup(page)
     # HACK: Match only table rows with a class to ignore twisted header/footer rows.
     file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
     return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
Exemplo n.º 52
0
 def parse_results_html(cls, page):
     tables = BeautifulSoup(page).findAll("table")
     return sum([cls._parse_results_table(table) for table in tables], [])
Exemplo n.º 53
0
class TestParser(object):
    def __init__(self,
                 options,
                 filename,
                 host=Host(),
                 source_root_directory=None):
        self.options = options
        self.filename = filename
        self.host = host
        self.filesystem = self.host.filesystem
        self.source_root_directory = source_root_directory

        self.test_doc = None
        self.ref_doc = None
        self.load_file(filename)

    def load_file(self, filename, is_ref=False):
        if self.filesystem.isfile(filename):
            try:
                doc = Parser(self.filesystem.read_binary_file(filename))
            except:
                # FIXME: Figure out what to do if we can't parse the file.
                _log.error("Failed to parse %s", filename)
                doc = None
        else:
            if self.filesystem.isdir(filename):
                # FIXME: Figure out what is triggering this and what to do about it.
                _log.error("Trying to load %s, which is a directory", filename)
            doc = None

        if is_ref:
            self.ref_doc = doc
        else:
            self.test_doc = doc

    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info
        if test_contents is not None:
            self.test_doc = Parser(test_contents)
        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type(
            'match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning(
                    'Multiple references are not supported. Importing the first ref defined in %s',
                    self.filesystem.basename(self.filename))

            try:
                href_match_file = matches[0]['href'].strip()
                if href_match_file.startswith('/'):
                    ref_file = self.filesystem.join(
                        self.source_root_directory,
                        href_match_file.lstrip('/'))
                else:
                    ref_file = self.filesystem.join(
                        self.filesystem.dirname(self.filename),
                        href_match_file)
            except KeyError as e:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"',
                           self.filesystem)
                return None

            if (ref_file == self.filename):
                return {'referencefile': self.filename}

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(
                    self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(
                        self.filesystem.dirname(self.filename),
                        self.filesystem.dirname(
                            ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {
                        'reference_relpath': reference_relpath,
                        'files': reference_support_files
                    }

        # not all reference tests have a <link rel='match'> element in WPT repo
        elif self.is_wpt_reftest():
            test_info = {
                'test': self.filename,
                'reference': self.potential_ref_filename()
            }
            test_info['reference_support_info'] = {}
        # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests
        elif self.is_wpt_manualtest():
            test_info = {'test': self.filename, 'manualtest': True}
        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif '-ref' in self.filename or 'reference' in self.filename:
            test_info = {'referencefile': self.filename}
        elif self.options['all'] is True:
            test_info = {'test': self.filename}

        if test_info and self.is_slow_test():
            test_info['slow'] = True

        return test_info

    def reference_links_of_type(self, reftest_type):
        return self.test_doc.findAll(rel=reftest_type)

    def is_jstest(self):
        """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
        return bool(
            self.test_doc.find(
                src=re.compile('[\'\"/]?/resources/testharness')))

    def is_wpt_manualtest(self):
        """Returns whether the test is a manual test according WPT rules."""
        # General rule for manual test i.e. file ends with -manual.htm path
        # See https://web-platform-tests.org/writing-tests/manual.html#requirements-for-a-manual-test
        if self.filename.find('-manual.') != -1:
            return True

        # Rule specific to CSS WG manual tests i.e. rely on <meta name="flags">
        # See https://web-platform-tests.org/writing-tests/css-metadata.html#requirement-flags
        # For further details and discussions, see the following links:
        # https://github.com/web-platform-tests/wpt/issues/5381
        # https://github.com/web-platform-tests/wpt/issues/5293
        for match in self.test_doc.findAll(name='meta',
                                           attrs={
                                               'name': 'flags',
                                               'content': True
                                           }):
            css_flags = set(match['content'].split())
            if bool(
                    css_flags & {
                        "animated", "font", "history", "interact", "paged",
                        "speech", "userstyle"
                    }):
                return True

        return False

    def is_slow_test(self):
        return any([
            match.name == 'meta' and match['name'] == 'timeout'
            for match in self.test_doc.findAll(content='long')
        ])

    def potential_ref_filename(self):
        parts = self.filesystem.splitext(self.filename)
        return parts[0] + '-ref' + parts[1]

    def is_wpt_reftest(self):
        """Returns whether the test is a ref test according WPT rules (i.e. file has a -ref.html counterpart)."""
        parts = self.filesystem.splitext(self.filename)
        return self.filesystem.isfile(self.potential_ref_filename())

    def support_files(self, doc):
        """ Searches the file for all paths specified in url()'s, href or src attributes."""
        support_files = []

        if doc is None:
            return support_files

        elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
        elements_with_href_attributes = doc.findAll(href=re.compile('.*'))

        url_pattern = re.compile('url\(.*\)')
        urls = []
        for url in doc.findAll(text=url_pattern):
            for url in re.findall(url_pattern, url):
                url = re.sub('url\([\'\"]?', '', url)
                url = re.sub('[\'\"]?\)', '', url)
                urls.append(url)

        src_paths = [
            src_tag['src'] for src_tag in elements_with_src_attributes
        ]
        href_paths = [
            href_tag['href'] for href_tag in elements_with_href_attributes
        ]

        paths = src_paths + href_paths + urls
        for path in paths:
            uri_scheme_pattern = re.compile(r"[A-Za-z][A-Za-z+.-]*:")
            if not uri_scheme_pattern.match(path):
                support_files.append(path)

        return support_files
Exemplo n.º 54
0
 def _parse_quips(self, page):
     soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES)
     quips = soup.find(
         text=re.compile(r"Existing quips:")).findNext("ul").findAll("li")
     return [unicode(quip_entry.string) for quip_entry in quips]
Exemplo n.º 55
0
 def builder_statuses(self):
     soup = BeautifulSoup(self._fetch_one_box_per_builder())
     return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
Exemplo n.º 56
0
    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info
        if test_contents is not None:
            self.test_doc = Parser(test_contents)
        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest
        matches = self.reference_links_of_type(
            'match') + self.reference_links_of_type('mismatch')
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning(
                    'Multiple references are not supported. Importing the first ref defined in %s',
                    self.filesystem.basename(self.filename))

            try:
                href_match_file = matches[0]['href'].strip()
                if href_match_file.startswith('/'):
                    ref_file = self.filesystem.join(
                        self.source_root_directory,
                        href_match_file.lstrip('/'))
                else:
                    ref_file = self.filesystem.join(
                        self.filesystem.dirname(self.filename),
                        href_match_file)
            except KeyError as e:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"',
                           self.filesystem)
                return None

            if (ref_file == self.filename):
                return {'referencefile': self.filename}

            if self.ref_doc is None:
                self.load_file(ref_file, True)

            test_info = {'test': self.filename, 'reference': ref_file}

            # If the ref file does not live in the same directory as the test file, check it for support files
            test_info['reference_support_info'] = {}
            if self.filesystem.dirname(ref_file) != self.filesystem.dirname(
                    self.filename):
                reference_support_files = self.support_files(self.ref_doc)
                if len(reference_support_files) > 0:
                    reference_relpath = self.filesystem.relpath(
                        self.filesystem.dirname(self.filename),
                        self.filesystem.dirname(
                            ref_file)) + self.filesystem.sep
                    test_info['reference_support_info'] = {
                        'reference_relpath': reference_relpath,
                        'files': reference_support_files
                    }

        # not all reference tests have a <link rel='match'> element in WPT repo
        elif self.is_wpt_reftest():
            test_info = {
                'test': self.filename,
                'reference': self.potential_ref_filename()
            }
            test_info['reference_support_info'] = {}
        # we check for wpt manual test before checking for jstest, as some WPT manual tests can be classified as CSS JS tests
        elif self.is_wpt_manualtest():
            test_info = {'test': self.filename, 'manualtest': True}
        elif self.is_jstest():
            test_info = {'test': self.filename, 'jstest': True}
        elif '-ref' in self.filename or 'reference' in self.filename:
            test_info = {'referencefile': self.filename}
        elif self.options['all'] is True:
            test_info = {'test': self.filename}

        if test_info and self.is_slow_test():
            test_info['slow'] = True

        return test_info
Exemplo n.º 57
0
 def _group_rows_from_edit_user_page(self, edit_user_page):
     soup = BeautifulSoup(edit_user_page,
                          convertEntities=BeautifulSoup.HTML_ENTITIES)
     return soup('td', {'class': 'groupname'})
Exemplo n.º 58
0
 def login_userid_pairs_from_edit_user_results(self, results_page):
     soup = BeautifulSoup(results_page, convertEntities=BeautifulSoup.HTML_ENTITIES)
     results_table = soup.find(id="admin_table")
     login_userid_pairs = [self._login_and_uid_from_row(row) for row in results_table('tr')]
     # Filter out None from the logins.
     return list(filter(lambda pair: bool(pair), login_userid_pairs))
Exemplo n.º 59
0
    def analyze_test(self, test_contents=None, ref_contents=None):
        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """

        test_info = None

        if test_contents is None and self.test_doc is None:
            return test_info

        if test_contents is not None:
            self.test_doc = Parser(test_contents)

        if ref_contents is not None:
            self.ref_doc = Parser(ref_contents)

        # First check if it's a reftest

        matches = self.reference_links_of_type("match") + self.reference_links_of_type("mismatch")
        if matches:
            if len(matches) > 1:
                # FIXME: Is this actually true? We should fix this.
                _log.warning(
                    "Multiple references are not supported. Importing the first ref defined in %s",
                    self.filesystem.basename(self.filename),
                )

            try:
                ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]["href"])
            except KeyError as e:
                # FIXME: Figure out what to do w/ invalid test files.
                _log.error('%s has a reference link but is missing the "href"', self.filesystem)
                return None

            if self.ref_doc is None:
                self.ref_doc = self.load_file(ref_file)

            test_info = {"test": self.filename, "reference": ref_file}

            # If the ref file path is relative, we need to check it for
            # relative paths also because when it lands in WebKit, it will be
            # moved down into the test dir.
            #
            # Note: The test files themselves are not checked for support files
            # outside their directories as the convention in the CSSWG is to
            # put all support files in the same dir or subdir as the test.
            #
            # All non-test files in the test's directory tree are normally
            # copied as part of the import as they are assumed to be required
            # support files.
            #
            # *But*, there is exactly one case in the entire css2.1 suite where
            # a test depends on a file that lives in a different directory,
            # which depends on another file that lives outside of its
            # directory. This code covers that case :)
            if matches[0]["href"].startswith(".."):
                support_files = self.support_files(self.ref_doc)
                test_info["refsupport"] = support_files

        elif self.is_jstest():
            test_info = {"test": self.filename, "jstest": True}
        elif self.options["all"] is True and not ("-ref" in self.filename) and not ("reference" in self.filename):
            test_info = {"test": self.filename}

        return test_info
Exemplo n.º 60
0
 def _parse_bug_page(self, page):
     soup = BeautifulSoup(page)
     bug = {}
     bug["id"] = int(soup.find("bug_id").string)
     bug["title"] = self._string_contents(soup.find("short_desc"))
     bug["reporter_email"] = self._string_contents(soup.find("reporter"))
     bug["assigned_to_email"] = self._string_contents(soup.find("assigned_to"))
     bug["cc_emails"] = [self._string_contents(element)
                         for element in soup.findAll('cc')]
     bug["attachments"] = [self._parse_attachment_element(element, bug["id"]) for element in soup.findAll('attachment')]
     bug["platform"] = self._string_contents(soup.find("rep_platform"))
     bug["os"] = self._string_contents(soup.find("op_sys"))
     bug["long_description"] = self._string_contents(soup.find("long_desc").findNext("thetext"))
     bug["keywords"] = self._string_contents(soup.find("keywords"))
     bug["component"] = self._string_contents(soup.find("component"))
     return bug