Пример #1
0
def generate_tests():
    """Dynamically generate extractor unittests"""
    def _generate_test(extr, tcase):
        def test(self):
            url, result = tcase
            print("\n", url, sep="")
            self._run_test(extr, url, result)

        return test

    # enable selective testing for direct calls
    if __name__ == '__main__' and len(sys.argv) > 1:
        if sys.argv[1].lower() == "all":
            extractors = extractor.extractors()
        else:
            extractors = [
                extr for extr in extractor.extractors()
                if extr.category in sys.argv or hasattr(extr, "basecategory")
                and extr.basecategory in sys.argv
            ]
        del sys.argv[1:]
    else:
        extractors = [
            extr for extr in extractor.extractors()
            if extr.category not in SKIP
        ]

    for extr in extractors:
        if not hasattr(extr, "test") or not extr.test:
            continue
        name = "test_" + extr.__name__ + "_"
        for num, tcase in enumerate(extr.test, 1):
            test = _generate_test(extr, tcase)
            test.__name__ = name + str(num)
            setattr(TestExtractors, test.__name__, test)
Пример #2
0
 def test_docstrings(self):
     """ensure docstring uniqueness"""
     for extr1 in extractor.extractors():
         for extr2 in extractor.extractors():
             if extr1 != extr2 and extr1.__doc__ and extr2.__doc__:
                 self.assertNotEqual(
                     extr1.__doc__,
                     extr2.__doc__,
                     "{} <-> {}".format(extr1, extr2),
                 )
Пример #3
0
 def test_docstrings(self):
     """ensure docstring uniqueness"""
     for extr1 in extractor.extractors():
         for extr2 in extractor.extractors():
             if extr1 != extr2 and extr1.__doc__ and extr2.__doc__:
                 self.assertNotEqual(
                     extr1.__doc__,
                     extr2.__doc__,
                     "{} <-> {}".format(extr1, extr2),
                 )
Пример #4
0
    def test_unique_pattern_matches(self):
        test_urls = []

        # collect testcase URLs
        for extr in extractor.extractors():
            for testcase in extr._get_tests():
                test_urls.append((testcase[0], extr))

        # iterate over all testcase URLs
        for url, extr1 in test_urls:
            matches = []

            # ... and apply all regex patterns to each one
            for extr2 in extractor._cache:

                # skip DirectlinkExtractor pattern if it isn't tested
                if extr1 != DLExtractor and extr2 == DLExtractor:
                    continue

                match = extr2.pattern.match(url)
                if match:
                    matches.append(match)

            # fail if more or less than 1 match happened
            if len(matches) > 1:
                msg = "'{}' gets matched by more than one pattern:".format(url)
                for match in matches:
                    msg += "\n- "
                    msg += match.re.pattern
                self.fail(msg)

            if len(matches) < 1:
                msg = "'{}' isn't matched by any pattern".format(url)
                self.fail(msg)
Пример #5
0
    def test_unique_pattern_matches(self):
        test_urls = []

        # collect testcase URLs
        for extr in extractor.extractors():
            for testcase in extr._get_tests():
                test_urls.append((testcase[0], extr))

        # iterate over all testcase URLs
        for url, extr1 in test_urls:
            matches = []

            # ... and apply all regex patterns to each one
            for extr2 in extractor._cache:

                # skip DirectlinkExtractor pattern if it isn't tested
                if extr1 != DLExtractor and extr2 == DLExtractor:
                    continue

                match = extr2.pattern.match(url)
                if match:
                    matches.append(match)

            # fail if more or less than 1 match happened
            if len(matches) > 1:
                msg = "'{}' gets matched by more than one pattern:".format(url)
                for match in matches:
                    msg += "\n- "
                    msg += match.re.pattern
                self.fail(msg)

            if len(matches) < 1:
                msg = "'{}' isn't matched by any pattern".format(url)
                self.fail(msg)
Пример #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--content", action="store_true")
    parser.add_argument("--recreate", action="store_true")
    parser.add_argument("urls", nargs="*")
    args = parser.parse_args()

    if args.recreate:
        urls = [
            test[0]
            for extr in extractor.extractors() if extr.category in args.urls
            for test in extr.test
        ]
    else:
        urls = args.urls

    setup_test_config()

    for url in urls:
        tjob = ResultJob(url, content=args.content)
        try:
            tjob.run()
        except Exception as exc:
            fmt = TESTDATA_EXCEPTION_FMT
            data = (exc.__class__.__name__,)
        else:
            fmt = TESTDATA_FMT
            data = (tjob.hash_url.hexdigest(),
                    tjob.hash_keyword.hexdigest(),
                    tjob.hash_content.hexdigest())
        print(tjob.extractor.__class__.__name__)
        print(fmt.format(url, *data))
Пример #7
0
    def test_names(self):
        """Ensure extractor classes are named CategorySubcategoryExtractor"""
        def capitalize(c):
            if "-" in c:
                return string.capwords(c.replace("-", " ")).replace(" ", "")
            if "." in c:
                c = c.replace(".", "")
            return c.capitalize()

        mapping = {
            "2chan": "futaba",
            "3dbooru": "threedeebooru",
            "4chan": "fourchan",
            "4plebs": "fourplebs",
            "8chan": "infinitychan",
            "oauth": None,
        }

        for extr in extractor.extractors():
            category = mapping.get(extr.category, extr.category)
            if category:
                expected = "{}{}Extractor".format(
                    capitalize(category),
                    capitalize(extr.subcategory),
                )
                if expected[0].isdigit():
                    expected = "_" + expected
                self.assertEqual(expected, extr.__name__)
Пример #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--content", action="store_true")
    parser.add_argument("--recreate", action="store_true")
    parser.add_argument("urls", nargs="*")
    args = parser.parse_args()

    if args.recreate:
        urls = [
            test[0]
            for extr in extractor.extractors() if extr.category in args.urls
            for test in extr.test
        ]
    else:
        urls = args.urls

    config.load()
    for url in urls:
        tjob = job.TestJob(url, content=args.content)
        try:
            tjob.run()
        except Exception as exc:
            fmt = TESTDATA_EXCEPTION_FMT
            data = (exc.__class__.__name__,)
        else:
            fmt = TESTDATA_FMT
            data = (tjob.hash_url.hexdigest(),
                    tjob.hash_keyword.hexdigest(),
                    tjob.hash_content.hexdigest())
        print(tjob.extractor.__class__.__name__)
        print(fmt.format(url, *data))
Пример #9
0
def generate_tests():
    """Dynamically generate extractor unittests"""
    def _generate_test(extr, tcase):
        def test(self):
            url, result = tcase
            print("\n", url, sep="")
            self._run_test(extr, url, result)
        return test

    # enable selective testing for direct calls
    if __name__ == '__main__' and len(sys.argv) > 1:
        categories = sys.argv[1:]
        negate = False
        if categories[0].lower() == "all":
            categories = ()
            negate = True
        elif categories[0].lower() == "broken":
            categories = BROKEN
        del sys.argv[1:]
    else:
        categories = BROKEN
        negate = True
        if categories:
            print("skipping:", ", ".join(categories))
    fltr = util.build_extractor_filter(categories, negate=negate)

    # add 'test_...' methods
    for extr in filter(fltr, extractor.extractors()):
        name = "test_" + extr.__name__ + "_"
        for num, tcase in enumerate(extr._get_tests(), 1):
            test = _generate_test(extr, tcase)
            test.__name__ = name + str(num)
            setattr(TestExtractorResults, test.__name__, test)
Пример #10
0
    def test_names(self):
        """Ensure extractor classes are named CategorySubcategoryExtractor"""
        def capitalize(c):
            if "-" in c:
                return string.capwords(c.replace("-", " ")).replace(" ", "")
            if "." in c:
                c = c.replace(".", "")
            return c.capitalize()

        mapping = {
            "2chan"  : "futaba",
            "3dbooru": "threedeebooru",
            "4chan"  : "fourchan",
            "4plebs" : "fourplebs",
            "8chan"  : "infinitychan",
            "oauth"  : None,
        }

        for extr in extractor.extractors():
            category = mapping.get(extr.category, extr.category)
            if category:
                expected = "{}{}Extractor".format(
                    capitalize(category),
                    capitalize(extr.subcategory),
                )
                if expected[0].isdigit():
                    expected = "_" + expected
                self.assertEqual(expected, extr.__name__)
Пример #11
0
def build_extractor_list():
    """Generate a sorted list of lists of extractor classes"""
    extractors = collections.defaultdict(list)

    # get lists of extractor classes grouped by category
    for extr in extractor.extractors():
        if not extr.category or extr.category in IGNORE_LIST:
            continue
        extractors[extr.category].append(extr)

    # sort extractor lists with the same category
    for extrlist in extractors.values():
        extrlist.sort(key=subcategory_key)

    # ugly hack to add e-hentai.org
    eh = []
    for extr in extractors["exhentai"]:

        class eh_extr(extr):
            category = "e-hentai"
            root = "https://e-hentai.org"

        eh.append(eh_extr)
    extractors["e-hentai"] = eh

    # sort lists by category
    return sorted(
        extractors.values(),
        key=lambda lst: category_key(lst[0]),
    )
Пример #12
0
    def test_names(self):
        """Ensure extractor classes are named CategorySubcategoryExtractor"""
        def capitalize(c):
            if "-" in c:
                return string.capwords(c.replace("-", " ")).replace(" ", "")
            return c.capitalize()

        for extr in extractor.extractors():
            if extr.category not in ("", "oauth"):
                expected = "{}{}Extractor".format(
                    capitalize(extr.category),
                    capitalize(extr.subcategory),
                )
                if expected[0].isdigit():
                    expected = "_" + expected
                self.assertEqual(expected, extr.__name__)
Пример #13
0
def generate_tests():
    """Dynamically generate extractor unittests"""
    def _generate_test(extr, tcase):
        def test(self):
            url, result = tcase
            print("\n", url, sep="")
            self._run_test(extr, url, result)
        return test

    # enable selective testing for direct calls
    if __name__ == '__main__' and len(sys.argv) > 1:
        if sys.argv[1].lower() == "all":
            fltr = lambda c, bc: True  # noqa: E731
        elif sys.argv[1].lower() == "broken":
            fltr = lambda c, bc: c in BROKEN  # noqa: E731
        else:
            argv = sys.argv[1:]
            fltr = lambda c, bc: c in argv or bc in argv  # noqa: E731
        del sys.argv[1:]
    else:
        skip = set(BROKEN)
        if "CI" in os.environ and "TRAVIS" in os.environ:
            skip |= set(TRAVIS_SKIP)
        print("skipping:", ", ".join(skip))
        fltr = lambda c, bc: c not in skip  # noqa: E731

    # filter available extractor classes
    extractors = [
        extr for extr in extractor.extractors()
        if fltr(
            extr.category,
            extr.basecategory if hasattr(extr, "basecategory") else None
        )
    ]

    # add 'test_...' methods
    for extr in extractors:
        if not hasattr(extr, "test") or not extr.test:
            continue
        name = "test_" + extr.__name__ + "_"
        for num, tcase in enumerate(extr.test, 1):
            test = _generate_test(extr, tcase)
            test.__name__ = name + str(num)
            setattr(TestExtractorResults, test.__name__, test)
Пример #14
0
def build_extractor_list():
    """Generate a sorted list of lists of extractor classes"""
    extractors = collections.defaultdict(list)

    # get lists of extractor classes grouped by category
    for extr in extractor.extractors():
        if not extr.category or extr.category in IGNORE_LIST:
            continue
        extractors[extr.category].append(extr)

    # sort extractor lists with the same category
    for extrlist in extractors.values():
        extrlist.sort(key=subcategory_key)

    # sort lists by category
    return sorted(
        extractors.values(),
        key=lambda lst: category_key(lst[0]),
    )
Пример #15
0
def build_extractor_list():
    """Generate a sorted list of lists of extractor classes"""
    extractors = collections.defaultdict(list)

    # get lists of extractor classes grouped by category
    for extr in extractor.extractors():
        if not extr.category or extr.category in IGNORE_LIST:
            continue
        extractors[extr.category].append(extr)

    # sort extractor lists with the same category
    for extrlist in extractors.values():
        extrlist.sort(key=subcategory_key)

    # sort lists by category
    return sorted(
        extractors.values(),
        key=lambda lst: category_key(lst[0]),
    )
Пример #16
0
def generate_tests():
    """Dynamically generate extractor unittests"""
    def _generate_test(extr, tcase):
        def test(self):
            url, result = tcase
            print("\n", url, sep="")
            self._run_test(extr, url, result)
        return test

    # enable selective testing for direct calls
    if __name__ == '__main__' and len(sys.argv) > 1:
        if sys.argv[1].lower() == "all":
            fltr = lambda c, bc: True  # noqa: E731
        elif sys.argv[1].lower() == "broken":
            fltr = lambda c, bc: c in BROKEN  # noqa: E731
        else:
            argv = sys.argv[1:]
            fltr = lambda c, bc: c in argv or bc in argv  # noqa: E731
        del sys.argv[1:]
    else:
        skip = set(BROKEN)
        if "CI" in os.environ and "TRAVIS" in os.environ:
            skip |= set(TRAVIS_SKIP)
        if skip:
            print("skipping:", ", ".join(skip))
        fltr = lambda c, bc: c not in skip  # noqa: E731

    # filter available extractor classes
    extractors = [
        extr for extr in extractor.extractors()
        if fltr(extr.category, getattr(extr, "basecategory", None))
    ]

    # add 'test_...' methods
    for extr in extractors:
        name = "test_" + extr.__name__ + "_"
        for num, tcase in enumerate(extr._get_tests(), 1):
            test = _generate_test(extr, tcase)
            test.__name__ = name + str(num)
            setattr(TestExtractorResults, test.__name__, test)
Пример #17
0
    def test_names(self):
        """Ensure extractor classes are named CategorySubcategoryExtractor"""
        mapping = {
            "2chan": "futaba",
            "3dbooru": "threedeebooru",
            "4chan": "fourchan",
            "4plebs": "fourplebs",
            "8chan": "infinitychan",
            "b4k": "bfourk",
            "oauth": None,
            "rbt": "rebeccablacktech",
            "whatisthisimnotgoodwithcomputers": "witingwc",
        }

        for extr in extractor.extractors():
            category = mapping.get(extr.category, extr.category)
            if category:
                expected = "{}{}Extractor".format(
                    category.capitalize(),
                    extr.subcategory.capitalize(),
                )
                self.assertEqual(expected, extr.__name__)
Пример #18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--content", action="store_true")
    parser.add_argument("--recreate", action="store_true")
    parser.add_argument("urls", nargs="*")
    args = parser.parse_args()

    if args.recreate:
        urls = [
            test[0] for extr in extractor.extractors()
            if extr.category in args.urls for test in extr.test
        ]
    else:
        urls = args.urls

    config.load()
    for url in urls:
        hjob = job.HashJob(url, content=args.content)
        hjob.run()
        print(hjob.extractor.__class__.__name__)
        print(
            TESTDATA_FMT.format(url, hjob.hash_url.hexdigest(),
                                hjob.hash_keyword.hexdigest(),
                                hjob.hash_content.hexdigest()))
Пример #19
0
        hjob.run()
        if "url" in result:
            self.assertEqual(hjob.hash_url.hexdigest(), result["url"])
        if "keyword" in result:
            self.assertEqual(hjob.hash_keyword.hexdigest(), result["keyword"])
        if "content" in result:
            self.assertEqual(hjob.hash_content.hexdigest(), result["content"])


def generate_test(extr):
    def test(self):
        print(extr.__name__)
        for url, result in extr.test:
            print(url)
            self.run_test(extr, url, result)
    return test


if __name__ == '__main__':
    import sys
    extractors = extractor.extractors()
    if len(sys.argv) > 1:
        extractors = filter(lambda x: x.category in sys.argv, extractors)
    for extr in extractors:
        if hasattr(extr, "test"):
            name = "test_" + extr.__name__
            test = generate_test(extr)
            setattr(TestExtractors, name, test)
    del sys.argv[1:]
    unittest.main(warnings='ignore')
Пример #20
0
    # don't work on travis-ci
    "exhentai",
    "kissmanga",
    "mangafox",
    "dynastyscans",
    "nijie",
    "archivedmoe",
    "archiveofsins",
    "thebarchive",
    # temporary issues
    "turboimagehost",
]
# enable selective testing for direct calls
if __name__ == '__main__' and len(sys.argv) > 1:
    if sys.argv[1].lower() == "all":
        extractors = extractor.extractors()
    else:
        extractors = [
            extr for extr in extractor.extractors()
            if extr.category in sys.argv
            or hasattr(extr, "basecategory") and extr.basecategory in sys.argv
        ]
    del sys.argv[1:]
else:
    extractors = [
        extr for extr in extractor.extractors() if extr.category not in skip
    ]

for extr in extractors:
    if hasattr(extr, "test") and extr.test:
        name = "test_" + extr.__name__ + "_"
Пример #21
0
#!/usr/bin/env python

import sys
import os.path
import datetime

ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.realpath(ROOTDIR))
from gallery_dl import extractor, job, config

tests = [([url[0] for url in extr.test if url[1]], extr)
         for extr in extractor.extractors() if hasattr(extr, "test")]

if len(sys.argv) > 1:
    tests = [(urls, extr) for urls, extr in tests if extr.category in sys.argv]

path = os.path.join(ROOTDIR, "archive/testdb", str(datetime.date.today()))
os.makedirs(path, exist_ok=True)
config.load()

for urls, extr in tests:
    for i, url in enumerate(urls):
        name = "%s-%s-%d.json" % (extr.category, extr.subcategory, i)
        print(name)
        with open(os.path.join(path, name), "w") as outfile:
            job.DataJob(url, file=outfile).run()
Пример #22
0
# dynamically genertate tests
def _generate_test(extr, tcase):
    def test(self):
        url, result = tcase
        print("\n", url, sep="")
        self._run_test(extr, url, result)

    return test


# enable selective testing for direct calls
skip = ["exhentai", "kissmanga", "mangafox"]
if __name__ == '__main__' and len(sys.argv) > 1:
    extractors = [
        extr for extr in extractor.extractors() if extr.category in sys.argv
    ]
    del sys.argv[1:]
else:
    extractors = [
        extr for extr in extractor.extractors() if extr.category not in skip
    ]

for extr in extractors:
    if hasattr(extr, "test") and extr.test:
        name = "test_" + extr.__name__ + "_"
        for num, tcase in enumerate(extr.test, 1):
            test = _generate_test(extr, tcase)
            test.__name__ = name + str(num)
            setattr(TestExtractors, test.__name__, test)
            del test
Пример #23
0
def _get_extractor(category):
    for extr in extractor.extractors():
        if extr.category == category and hasattr(extr, "_login_impl"):
            url = next(extr._get_tests())[0]
            return extr.from_url(url)
Пример #24
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Collect results of extractor unit tests"""

import sys
import os.path
import datetime

import util
from gallery_dl import extractor, job, config
from test.test_results import setup_test_config

# filter test cases

tests = [(idx, extr, url, result) for extr in extractor.extractors()
         if hasattr(extr, "test") and extr.test
         if len(sys.argv) <= 1 or extr.category in sys.argv
         for idx, (url, result) in enumerate(extr._get_tests()) if result]

# setup target directory

path = util.path("archive", "testdb", str(datetime.date.today()))
os.makedirs(path, exist_ok=True)

for idx, extr, url, result in tests:

    # filename
    name = "{}-{}-{}.json".format(extr.category, extr.subcategory, idx)
    print(name)

    # config values
Пример #25
0

# dynamically generate tests
def _generate_test(extr, tcase):
    def test(self):
        url, result = tcase
        print("\n", url, sep="")
        self._run_test(extr, url, result)
    return test


# enable selective testing for direct calls
skip = ["exhentai", "kissmanga", "mangafox", "mangashare", "readcomiconline"]
if __name__ == '__main__' and len(sys.argv) > 1:
    extractors = [
        extr for extr in extractor.extractors()
        if extr.category in sys.argv
    ]
    del sys.argv[1:]
else:
    extractors = [
        extr for extr in extractor.extractors()
        if extr.category not in skip
    ]


for extr in extractors:
    if hasattr(extr, "test") and extr.test:
        name = "test_" + extr.__name__ + "_"
        for num, tcase in enumerate(extr.test, 1):
            test = _generate_test(extr, tcase)
Пример #26
0
import sys
import os.path
import datetime

import util
from gallery_dl import extractor, job, config
from test.test_results import setup_test_config


# filter test cases

tests = [
    (idx, extr, url, result)

    for extr in extractor.extractors()
    if hasattr(extr, "test") and extr.test
    if len(sys.argv) <= 1 or extr.category in sys.argv

    for idx, (url, result) in enumerate(extr._get_tests())
    if result
]


# setup target directory

path = util.path("archive", "testdb", str(datetime.date.today()))
os.makedirs(path, exist_ok=True)


for idx, extr, url, result in tests: