Exemplo n.º 1
0
def test_html2():
    """Test all document errors, as long as the document is valid."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/miscerrors.html")
    assert myfile.tree
    x = KXhtml()
    x.check_css(myfile)
    x.check_title(myfile)
    x.check_document(myfile)
    x.epub_toc(myfile)
    x.check_anchors(myfile)
    x.check_unicode(myfile)

    # CSS
    assert x.cssutils_errors == []
    assert x.sel_unchecked == []
    assert len(x.sel_unused) == 2
    assert '.large' in x.sel_unused
    assert '.pagenum' in x.sel_unused
    assert x.classes_undefined == [[17, 'asdfgh']]

    # Title
    assert x.good_format == False
    assert x.title == 'no title — no author'
    assert x.author == None

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == myfile.encoding
    assert len(x.encoding_errors) == 0

    # TOC - not tested here
    assert len(x.toc) == 2

    # Languages
    assert x.document_lang == "fr"
    assert x.document_xmllang == "en"

    # h1
    assert x.num_h1 == 2

    # sup stars
    assert len(x.stars_in_sup) == 2

    # Inline style
    assert len(x.inline_style) == 2
    assert x.inline_style[0][1] == 'div'
    assert x.inline_style[0][2] == 'text-indent:2em'
    assert x.inline_style[1][1] == 'span'
    assert x.inline_style[1][2] == 'margin-left: 1em;'

    # Something after <sup> tag
    assert len(x.text_after_sup) == 2
    assert x.text_after_sup == [37, 38]

    # Empty lines at the end
    assert myfile.ending_empty_lines == 5
Exemplo n.º 2
0
def test_html3():
    """Test document with no error."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/noerror.html")
    assert myfile.tree
    x = KXhtml()
    x.check_css(myfile)
    x.check_title(myfile)
    x.check_document(myfile)
    x.epub_toc(myfile)
    x.check_anchors(myfile)
    x.check_unicode(myfile)

    # CSS
    assert x.cssutils_errors == []
    assert x.sel_unchecked == []
    assert len(x.sel_unused) == 0
    assert len(x.classes_undefined) == 0

    # Title
    assert x.good_format == True
    assert x.title == 'Voyage à Cayenne, Vol. 1'
    assert x.author == 'L. A. Pitou'

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == myfile.encoding
    assert len(x.encoding_errors) == 0

    # TOC
    assert len(x.toc) == 10
    assert x.toc[0][0] == 0
    assert x.toc[0][1] == 'one header'
    assert x.toc[3][0] == 3
    assert x.toc[3][1] == 'lvl 4-1'
    assert x.toc[8][0] == 2
    assert x.toc[8][1] == 'other'
    assert x.toc[9][0] == 3
    assert x.toc[9][1] == 'lvl 4-3 on 2 lines'

    # Languages
    assert x.document_lang == "fr"
    assert x.document_xmllang == "fr"

    # h1
    assert x.num_h1 == 1

    # sup stars
    assert len(x.stars_in_sup) == 0

    # Inline style
    assert len(x.inline_style) == 0

    # Something after <sup> tag
    assert len(x.text_after_sup) == 0

    assert myfile.ending_empty_lines == 1
Exemplo n.º 3
0
def test_html3():
    """Test document with no error."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/noerror.html")
    assert myfile.tree
    x = KXhtml()
    x.check_css(myfile)
    x.check_title(myfile)
    x.check_document(myfile)
    x.epub_toc(myfile)
    x.check_anchors(myfile)
    x.check_unicode(myfile)

    # CSS
    assert x.cssutils_errors == []
    assert x.sel_unchecked == []
    assert len(x.sel_unused) == 0
    assert len(x.classes_undefined) == 0

    # Title
    assert x.good_format == True
    assert x.title == 'Voyage à Cayenne, Vol. 1'
    assert x.author == 'L. A. Pitou'

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == myfile.encoding
    assert len(x.encoding_errors) == 0

    # TOC
    assert len(x.toc) == 10
    assert x.toc[0][0] == 0
    assert x.toc[0][1] == 'one header'
    assert x.toc[3][0] == 3
    assert x.toc[3][1] == 'lvl 4-1'
    assert x.toc[8][0] == 2
    assert x.toc[8][1] == 'other'
    assert x.toc[9][0] == 3
    assert x.toc[9][1] == 'lvl 4-3 on 2 lines'

    # Languages
    assert x.document_lang == "fr"
    assert x.document_xmllang == "fr"

    # h1
    assert x.num_h1 == 1

    # sup stars
    assert len(x.stars_in_sup) == 0

    # Inline style
    assert len(x.inline_style) == 0

    # Something after <sup> tag
    assert len(x.text_after_sup) == 0

    assert myfile.ending_empty_lines == 1
Exemplo n.º 4
0
def test_html2():
    """Test all document errors, as long as the document is valid."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/miscerrors.html")
    assert myfile.tree
    x = KXhtml()
    x.check_css(myfile)
    x.check_title(myfile)
    x.check_document(myfile)
    x.epub_toc(myfile)
    x.check_anchors(myfile)
    x.check_unicode(myfile)

    # CSS
    assert x.cssutils_errors == []
    assert x.sel_unchecked == []
    assert len(x.sel_unused) == 2
    assert '.large' in x.sel_unused
    assert '.pagenum' in x.sel_unused
    assert x.classes_undefined == [[17, 'asdfgh']]

    # Title
    assert x.good_format == False
    assert x.title == 'no title — no author'
    assert x.author == None

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == myfile.encoding
    assert len(x.encoding_errors) == 0

    # TOC - not tested here
    assert len(x.toc) == 2

    # Languages
    assert x.document_lang == "fr"
    assert x.document_xmllang == "en"

    # h1
    assert x.num_h1 == 2

    # sup stars
    assert len(x.stars_in_sup) == 2

    # Inline style
    assert len(x.inline_style) == 2
    assert x.inline_style[0][1] == 'div'
    assert x.inline_style[0][2] == 'text-indent:2em'
    assert x.inline_style[1][1] == 'span'
    assert x.inline_style[1][2] == 'margin-left: 1em;'

    # Something after <sup> tag
    assert len(x.text_after_sup) == 2
    assert x.text_after_sup == [37, 38]

    # Empty lines at the end
    assert myfile.ending_empty_lines == 5
Exemplo n.º 5
0
def test_html2():
    from sourcefile import SourceFile
    myfile = SourceFile()
    myfile.load_xhtml("data/testfiles/nocharset.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)
    assert x.meta_encoding == None
    assert myfile.encoding == 'utf-8'
    assert len(x.encoding_errors) == 0
Exemplo n.º 6
0
def test_html2():
    from sourcefile import SourceFile
    myfile = SourceFile()
    myfile.load_xhtml("data/testfiles/nocharset.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)
    assert x.meta_encoding == None
    assert myfile.encoding == 'utf-8'
    assert len(x.encoding_errors) == 0
Exemplo n.º 7
0
def test_html1():
    from sourcefile import SourceFile
    myfile = SourceFile()
    myfile.load_xhtml("data/testfiles/badcharset.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)
    assert x.meta_encoding == 'iso-8859-1'
    assert myfile.encoding == 'utf-8'
    assert len(x.encoding_errors) == 1
    assert myfile.ending_empty_lines == 1
Exemplo n.º 8
0
def test_html1():
    from sourcefile import SourceFile
    myfile = SourceFile()
    myfile.load_xhtml("data/testfiles/badcharset.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)
    assert x.meta_encoding == 'iso-8859-1'
    assert myfile.encoding == 'utf-8'
    assert len(x.encoding_errors) == 1
    assert myfile.ending_empty_lines == 1
Exemplo n.º 9
0
def main(url):
    files = []
    for root, directories, filenames in os.walk(url):
        for filename in filenames:
            file = SourceFile(os.path.join(root, filename))
            files.append(file)
            try:
                print("Parsing " + file.fullpath)
                file.parse()
                if len(file.profanewords) > 0:
                    for index, word in enumerate(file.profanewords):
                        print("Line " + str(file.profanelines[index] + 1) +
                              ": " + word)
                    print("Found " + str(len(file.profanewords)) +
                          " words for a score of " + str(file.profanityscore))
                    print()
            except Exception as ex:
                print("Failed to parse file: ", ex)

    # Calculate and display statistics
    mostprofanefile = max(files, key=lambda curfile: len(curfile.profanewords))

    from collections import Counter
    mostprofanewords = []
    for file in files:
        word = file.favoriteprofaneword()
        if word is not None:
            mostprofanewords.append(word)

    if len(mostprofanewords) > 0:
        profanewords = Counter(mostprofanewords)
        mostcommonprofaneword = [
            elem[0] for elem in profanewords.most_common(1)
        ][0]
    else:
        mostcommonprofaneword = "N/A"

    print()
    print("Total files scanned: " + str(len(files)))
    print("Words found: " + str(sum(file.profanewordcount[1]
                                    for file in files)) + " Mild, " +
          str(sum(file.profanewordcount[2] for file in files)) + " Medium, " +
          str(sum(file.profanewordcount[3] for file in files)) + " Strong, " +
          str(sum(file.profanewordcount[4]
                  for file in files)) + " Very Strong")
    totalprofanityscore = sum(file.profanityscore for file in files)
    if totalprofanityscore > 0:
        print("Most profane file: " + str(mostprofanefile.fullpath) +
              " with " + str(len(mostprofanefile.profanewords)) +
              " words for a score of " + str(mostprofanefile.profanityscore))
        print("Most common word: " + mostcommonprofaneword)
        print("Total score: " + str(totalprofanityscore))
Exemplo n.º 10
0
def test_encoding1():
    """No encoding."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/noencoding.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == None
Exemplo n.º 11
0
    def prepare_bc(self):
        bc_dir = self.get_bc_dir_of_project()
        bitcode_files = get_files_in_dir(
            bc_dir, ext='.bc', search_spaces=self.arguments.search_spaces)

        if len(bitcode_files) == 0:
            print 'There is no bitcode in:', bc_dir
            exit(0)
        print 'Total number of c files : {}'.format(len(bitcode_files))

        for bitcode_file in bitcode_files:
            sf = SourceFile('', arguments=self.arguments)
            sf.emit_llvm_ll_and_functions(bitcode_file)
Exemplo n.º 12
0
def test_encoding1():
    """No encoding."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/noencoding.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == None
Exemplo n.º 13
0
    def update(self,
               tests_root,
               url_base,
               new_rev,
               committed_changes=None,
               local_changes=None,
               remove_missing_local=False):

        if local_changes is None:
            local_changes = {}

        if committed_changes is not None:
            for rel_path, status in committed_changes:
                self.remove_path(rel_path)
                if status == "modified":
                    use_committed = rel_path in local_changes
                    source_file = SourceFile(tests_root,
                                             rel_path,
                                             url_base,
                                             use_committed=use_committed)
                    self.extend(source_file.manifest_items())

        self.local_changes = LocalChanges(self)

        local_paths = set()
        for rel_path, status in local_changes.iteritems():
            local_paths.add(rel_path)

            if status == "modified":
                existing_items = self._committed_with_path(rel_path)
                source_file = SourceFile(tests_root,
                                         rel_path,
                                         url_base,
                                         use_committed=False)
                local_items = set(source_file.manifest_items())

                updated_items = local_items - existing_items
                self.local_changes.extend(updated_items)
            else:
                self.local_changes.add_deleted(rel_path)

        if remove_missing_local:
            for path in self._committed_paths() - local_paths:
                self.local_changes.add_deleted(path)

        self.update_reftests()

        if new_rev is not None:
            self.rev = new_rev
        self.url_base = url_base
Exemplo n.º 14
0
def test_encoding2():
    """validly declared us-ascii encoding, read as utf-8."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/asciiencoding.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == 'us-ascii'
    assert len(x.encoding_errors) == 0
Exemplo n.º 15
0
def test_encoding4():
    """invalid encoding."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/inv-encoding.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == 'ascii'
    assert len(x.encoding_errors) == 2  # invalid + different encodings
Exemplo n.º 16
0
def test_encoding3():
    """declared ascii but contains unicode."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/notasciiencoding.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == 'us-ascii'
    assert len(x.encoding_errors) == 1
Exemplo n.º 17
0
def test_encoding2():
    """validly declared us-ascii encoding, read as utf-8."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/asciiencoding.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == 'us-ascii'
    assert len(x.encoding_errors) == 0
Exemplo n.º 18
0
def test_encoding3():
    """declared ascii but contains unicode."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/notasciiencoding.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == 'us-ascii'
    assert len(x.encoding_errors) == 1
Exemplo n.º 19
0
def test_encoding4():
    """invalid encoding."""
    from sourcefile import SourceFile
    myfile = SourceFile()
    assert myfile
    myfile.load_xhtml("data/testfiles/inv-encoding.html")
    assert myfile.tree
    x = KXhtml()
    x.check_document(myfile)

    # Encoding
    assert myfile.encoding == 'utf-8'
    assert x.meta_encoding == 'ascii'
    assert len(x.encoding_errors) == 2 # invalid + different encodings
Exemplo n.º 20
0
def process_sample(source_file, args, project_dir, compile_arguments, type=''):
    if type == 'ast' or type == 'bc':
        source_file_obj = SourceFile(source_file=source_file,
                                     arguments=args,
                                     project_dir=project_dir,
                                     compile_arguments=compile_arguments,
                                     analysis_type=type)
        try:
            source_file_obj.analyze()
        except:
            pass
            # print 'CRASH', type, source_file
    if type == 'pdg' or type == 'as':
        file_handler = BitCodeFile(file_info=source_file,
                                   arguments=args,
                                   analysis_type=type)
        success = file_handler.analyze()
        return int(success)
    return 1
Exemplo n.º 21
0
    def update(self,
               tests_root,
               url_base,
               new_rev,
               committed_changes=None,
               local_changes=None,
               remove_missing_local=False):

        if local_changes is None:
            local_changes = {}

        if committed_changes is not None:
            for rel_path, status in committed_changes:
                self.remove_path(rel_path)
                if status == "modified":
                    use_committed = rel_path in local_changes
                    source_file = SourceFile(tests_root,
                                             rel_path,
                                             url_base,
                                             use_committed=use_committed)
                    self.extend(source_file.manifest_items())

        self.local_changes = LocalChanges(self)

        local_paths = set()
        for rel_path, status in local_changes.iteritems():
            local_paths.add(rel_path)

            if status == "modified":
                existing_items = self._committed_with_path(rel_path)
                source_file = SourceFile(tests_root,
                                         rel_path,
                                         url_base,
                                         use_committed=False)
                local_items = set(source_file.manifest_items())

                updated_items = local_items - existing_items
                self.local_changes.extend(updated_items)
            else:
                self.local_changes.add_deleted(rel_path)

        if remove_missing_local:
            for path in self._committed_paths() - local_paths:
                self.local_changes.add_deleted(path)

        self.update_reftests()

        if new_rev is not None:
            self.rev = new_rev
        self.url_base = url_base
Exemplo n.º 22
0
def check_parsed(path, f):
    source_file = SourceFile(repo_root, path, "/")

    errors = []
    if source_file.root is None:
        return [("PARSE-FAILED", "Unable to parse file %s" % path, None)]

    if source_file.testharness_nodes:
        if len(source_file.testharness_nodes) > 1:
            errors.append(("MULTIPLE-TESTHARNESS",
                           "%s more than one <script src='/resources/testharness.js'>" % path, None))
        if not source_file.testharnessreport_nodes:
            errors.append(("MISSING-TESTHARNESSREPORT",
                            "%s missing <script src='/resources/testharnessreport.js'>" % path, None))

    if source_file.testharnessreport_nodes:
        if len(source_file.testharnessreport_nodes) > 1:
            errors.append(("MULTIPLE-TESTHARNESSREPORT",
                           "%s more than one <script src='/resources/testharnessreport.js'>" % path, None))
        if not source_file.testharness_nodes:
            errors.append(("MISSING-TESTHARNESS",
                            "%s missing <script src='/resources/TESTHARNESS.js'>" % path, None))

    return errors
Exemplo n.º 23
0
    def make_new():
        from sourcefile import SourceFile

        return SourceFile(tests_root, path, manifest.url_base)