Пример #1
0
class Hose:
    """Hose.

    This class should be instantiated with a dictionary of url and glob pairs,
    it will use a walker to recursively decend each URL and map each URL
    to a file.

    It can be used as an iterator to yield (key, url) for each URL where
    key is one of the dictionary keys or None if none matched.
    """

    def __init__(self, filters=(), log_parent=None):
        self.log = log.get_logger("Hose", log_parent)
        self.filter = Filter(filters, log_parent=self.log)
        self.urls = self.reduceWork([pattern.base_url for pattern in filters])

    def reduceWork(self, url_list):
        """Simplify URL list to remove children of other elements.

        Reduces the amount of work we need to do by removing any URL from
        the list whose parent also appears in the list.  Returns the
        reduced list.
        """
        self.log.info("Reducing URL list.")
        urls = []
        url_list = list(url_list)
        while len(url_list):
            url = url_list.pop(0)
            for check_url in urls + url_list:
                if url.startswith(check_url):
                    self.log.debug("Discarding %s as have %s", url, check_url)
                    break
            else:
                urls.append(url)

        return urls

    def run(self):
        """Run over the URL list."""
        self.log.info("Identifying URLs")
        for base_url in self.urls:
            for dirpath, dirnames, filenames in walk(base_url, self.log):
                for filename in filenames:
                    url = combine_url(base_url, dirpath, filename)
                    key = self.filter.check(url)
                    yield (key, url)
                # To affect which directories the walker descends
                # into, we must update the dirnames list in place.
                i = 0
                while i < len(dirnames):
                    url = combine_url(base_url, dirpath, dirnames[i])
                    if self.filter.isPossibleParent(url):
                        i += 1
                    else:
                        self.log.info('Skipping %s', url)
                        del dirnames[i]

    __iter__ = run
Пример #2
0
class Hose:
    """Hose.

    This class should be instantiated with a dictionary of url and glob pairs,
    it will use a walker to recursively decend each URL and map each URL
    to a file.

    It can be used as an iterator to yield (key, url) for each URL where
    key is one of the dictionary keys or None if none matched.
    """
    def __init__(self, filters=(), log_parent=None):
        self.log = log.get_logger("Hose", log_parent)
        self.filter = Filter(filters, log_parent=self.log)
        self.urls = self.reduceWork([pattern.base_url for pattern in filters])

    def reduceWork(self, url_list):
        """Simplify URL list to remove children of other elements.

        Reduces the amount of work we need to do by removing any URL from
        the list whose parent also appears in the list.  Returns the
        reduced list.
        """
        self.log.info("Reducing URL list.")
        urls = []
        url_list = list(url_list)
        while len(url_list):
            url = url_list.pop(0)
            for check_url in urls + url_list:
                if url.startswith(check_url):
                    self.log.debug("Discarding %s as have %s", url, check_url)
                    break
            else:
                urls.append(url)

        return urls

    def run(self):
        """Run over the URL list."""
        self.log.info("Identifying URLs")
        for base_url in self.urls:
            for dirpath, dirnames, filenames in walk(base_url, self.log):
                for filename in filenames:
                    url = combine_url(base_url, dirpath, filename)
                    key = self.filter.check(url)
                    yield (key, url)
                # To affect which directories the walker descends
                # into, we must update the dirnames list in place.
                i = 0
                while i < len(dirnames):
                    url = combine_url(base_url, dirpath, dirnames[i])
                    if self.filter.isPossibleParent(url):
                        i += 1
                    else:
                        self.log.info('Skipping %s', url)
                        del dirnames[i]

    __iter__ = run
Пример #3
0
 def testCreatesChildLogger(self):
     """Filter creates a child logger if given a parent."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     from logging import getLogger
     parent = getLogger("foo")
     f = Filter(log_parent=parent)
     self.assertEquals(f.log.parent, parent)
Пример #4
0
 def __init__(self, filters=(), log_parent=None):
     self.log = log.get_logger("Hose", log_parent)
     self.filter = Filter(filters, log_parent=self.log)
     self.urls = self.reduceWork([pattern.base_url for pattern in filters])
Пример #5
0
 def testCreatesDefaultLogger(self):
     """Filter creates a default logger."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     from logging import Logger
     f = Filter()
     self.failUnless(isinstance(f.log, Logger))
Пример #6
0
 def makeFilter(self, key, urlglob):
     from lp.registry.scripts.productreleasefinder.filter import (
         Filter, FilterPattern)
     pattern = FilterPattern(key, urlglob)
     return Filter([pattern])
Пример #7
0
 def testNoFilters(self):
     """Filter.check returns None if there are no filters."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     f = Filter()
     self.assertEquals(f.check("file:///subdir/file"), None)
Пример #8
0
 def testFiltersPropertyGiven(self):
     """Filter constructor accepts argument to set filters property."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     f = Filter(["wibble"])
     self.assertEquals(len(f.filters), 1)
     self.assertEquals(f.filters[0], "wibble")
Пример #9
0
 def testDefaultFiltersProperty(self):
     """Filter constructor initializes filters property to empty dict."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     f = Filter()
     self.assertEquals(f.filters, [])
Пример #10
0
 def __init__(self, filters=(), log_parent=None):
     self.log = log.get_logger("Hose", log_parent)
     self.filter = Filter(filters, log_parent=self.log)
     self.urls = self.reduceWork([pattern.base_url for pattern in filters])
Пример #11
0
 def testNoFilters(self):
     """Filter.check returns None if there are no filters."""
     from lp.registry.scripts.productreleasefinder.filter import (
         Filter)
     f = Filter()
     self.assertEquals(f.check("file:///subdir/file"), None)