Exemplos de RegexrClass em Python, exemplos de newsline.apps.web.newsworm.core.regexr.RegexrClass em Python

Exemplo n.º 1

0

Exibir arquivo

    def test_regexr_split(self):
        regexr = RegexrClass()
        string = '/article/page_2.html?param=1'
        print("String to be split: %s" % string)
        split_string = regexr.split(string)
        assert isinstance(split_string, list)
        print("Results: ")
        print(split_string)
        if len(split_string) == 12:
            test_passed = split_string[0] == "/" \
               and split_string[1] == "article" \
               and split_string[2] == "/" \
               and split_string[3] == "page" \
               and split_string[4] == "_" \
               and split_string[5] == "2" \
               and split_string[6] == "." \
               and split_string[7] == "html" \
               and split_string[8] == "?" \
               and split_string[9] == "param" \
               and split_string[10] == "=" \
               and split_string[11] == "1"

            if test_passed:
                self.print_success("OK!")
            else:
                self.print_failure("Test failed.")

Exemplo n.º 2

0

Exibir arquivo

    def isUrlTest(self):
        rg = RegexrClass()
        urls = ["/url", "http://www.url.com", "www.url.com", "#"]

        for u in urls:
            if rg.is_url(u): self.print_success("Matched %s" % u)
            else: self.print_failure("Did not match %s" % u)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: helpers.py Projeto: derrandz/python-news-crawler

def has_http_prefix(url):
    regxr = RegexrClass()
    matches = regxr.compile("(http:\/\/)").search(url)
    if matches:
        if matches.group(0) == 'http://':
            return True

    return False

Exemplo n.º 4

0

Exibir arquivo

Arquivo: helpers.py Projeto: derrandz/python-news-crawler

 def _is_url(url, root):
     regxr = RegexrClass()
     if not root:
         if regxr.is_url(url):
             return True
         return False
     else:
         if regxr.is_rooturl(url):
             return True
         return False

Exemplo n.º 5

0

Exibir arquivo

    def SpecialCharsTest(self):
        rg = RegexrClass()
        special_chars = ["/", "?", "-"]
        _string = "/hey?you-"

        self.print_info("Trying to return the special characters in %s" %
                        _string)
        rspchars = rg.special_chars(_string)
        if rspchars == special_chars:
            self.print_success("returned %s" % special_chars)
            self.print_success("OK!")
        else:
            self.print_failure("returned %s" % rspchars)
            self.print_failure("FAILED!")

Exemplo n.º 6

0

Exibir arquivo

			def __init__(self, rooturl=None, domitems=None):
				self.regexr = RegexrClass([]) # Explicitly passing an empty list to indicate that this instance will be used as a helper only.
				self.rooturl = rooturl

				# This is called the normalization phase
				# The nesting of the function calls is very important
				self.domitems = self.clean(self.decode(self.normalize(self.validate(domitems))))

Exemplo n.º 7

0

Exibir arquivo

    def splitTest(self):
        regexr = RegexrClass()
        _links = "www.alyaoum24.com/news/sport"
        # _links = "/topics/آش-واقع"
        print("String to be split: %s" % _links)
        split_string = regexr.split(_links)
        assert isinstance(split_string, list)
        print("Results: ")
        print(split_string)

        _links = "maing?#asdokasd/okao_sdka"
        igndel = ["?", "#"]
        print("String to be split: %s with ignored delimieters %s" %
              (_links, igndel))
        split_string = regexr.split(_links, igndel)
        assert isinstance(split_string, list)
        print("Results: ")
        print(split_string)

Exemplo n.º 8

0

Exibir arquivo

        def _patternize_test_smart(url_examples, matcheables, unmatcheables):
            self.print_seperator()
            try:
                regexr = RegexrClass(url_examples)
            except Exception as e:
                self.print_failure("Test failed with %s" % str(e))
                raise e
            else:
                print("The provided urls : %s" % url_examples)
                print("The extracted pattern is : %s" % regexr.pattern)

                self.print_info("\nThese tests should all succeed!\n")
                _match_smart(regexr, matcheables)

                self.print_info("\nThese tests should all fail!\n")
                _match_smart(regexr, unmatcheables)
            self.print_seperator()

Exemplo n.º 9

0

Exibir arquivo

    def test_remove_double_slash(self):
        examples = [
            "//alink", "//alink//", "/alink/", "alink//", "///link///link",
            "http://link//linkk", "/link/http://link/link"
        ]
        results = [
            "/alink", "/alink", "/alink", "alink", "/link/link",
            "http://link/linkk", "/link/http://link/link"
        ]

        regexr = RegexrClass()
        rresults = list(map(regexr.remove_double_slash, examples))

        for i, el in enumerate(results):
            self.print_with_color(
                "BOLD", "Arg Supplied: %s, Expected: %s, Result: %s" %
                (examples[i], el, rresults[i]))
            if el == rresults[i]:
                self.print_success("OK!")
            else:
                self.print_failure("FAILED!")

Exemplo n.º 10

0

Exibir arquivo

			def __init__(self, rooturl=None, domitems=None):
				self.regexr = RegexrClass()
				self.rooturl = rooturl

				self.temp = self.normalize(domitems)

Exemplo n.º 11

0

Exibir arquivo

			def __init__(self, rooturl=None, domitems=None):
				self.regexr = RegexrClass()
				self.rooturl = rooturl

				self.domitems = self.clean(self.decode(self.normalize(self.validate(domitems))))
				self.patternize()