Exemplo n.º 1
0
    def fetch(self):
        """Get all the links listed in the Mailing List's URL.

        The archives are usually retrieved in descending chronological
        order (newest archives are always shown on the top of the archives).
        Reverse the list to analyze in chronological order.
        """

        mailing_list = self.mailing_list

        htmlparser = MyHTMLParser(mailing_list.location,
                                  self.web_user, self.web_password)
        # links = htmlparser.get_mboxes_links(self.force)
        links = self.filter_links(htmlparser.get_links())

        for link in links:
            basename = os.path.basename(link)
            destfilename = os.path.join(mailing_list.compressed_dir, basename)

            try:
                # If the URL is for the current month, always retrieve.
                # Otherwise, check visited status & local files first
                this_month = find_current_month(link)

                if this_month:
                    self._print_output(
                        'Current month detected: '
                        'Found substring %s in URL %s...' % (this_month, link))
                    self._print_output('Retrieving %s...' % link)
                    self._retrieve_remote_file(link, destfilename)
                elif os.path.exists(destfilename) and not self.force:
                    self._print_output('Already downloaded %s' % link)
                else:
                    self._print_output('Retrieving %s...' % link)
                    self._retrieve_remote_file(link, destfilename)
            except IOError:
                self._print_output("Unknown URL: " + link + ". Skipping.")
                continue

            yield MBoxArchive(destfilename, link)
Exemplo n.º 2
0
    def fetch(self):
        """Get all the links listed in the Mailing List's URL.

        The archives are usually retrieved in descending chronological
        order (newest archives are always shown on the top of the archives).
        Reverse the list to analyze in chronological order.
        """

        mailing_list = self.mailing_list

        htmlparser = MyHTMLParser(mailing_list.location, self.web_user,
                                  self.web_password)
        # links = htmlparser.get_mboxes_links(self.force)
        links = self.filter_links(htmlparser.get_links())

        for link in links:
            basename = os.path.basename(link)
            destfilename = os.path.join(mailing_list.compressed_dir, basename)

            try:
                # If the URL is for the current month, always retrieve.
                # Otherwise, check visited status & local files first
                this_month = find_current_month(link)

                if this_month:
                    self._print_output('Current month detected: '
                                       'Found substring %s in URL %s...' %
                                       (this_month, link))
                    self._print_output('Retrieving %s...' % link)
                    self._retrieve_remote_file(link, destfilename)
                elif os.path.exists(destfilename) and not self.force:
                    self._print_output('Already downloaded %s' % link)
                else:
                    self._print_output('Retrieving %s...' % link)
                    self._retrieve_remote_file(link, destfilename)
            except IOError:
                self._print_output("Unknown URL: " + link + ". Skipping.")
                continue

            yield MBoxArchive(destfilename, link)