Пример #1
0
    def search_findfiles(self, filename):

        print 'Searching http://www.findfiles.com for mirrors of file %s...' % filename

        # Note: this grammar could change if the site changes its templates
        content1 = Literal("<h1") + SkipTo(Literal("Advanced Search"))
        content2 = Literal("<a") + Literal("href") + Literal("=") + SkipTo(
            Literal(">"))

        search_url = self.sites['findfiles'][0] % filename

        conn = connector.HarvestManUrlConnector()
        data = conn.get_url_data(search_url)
        # print data
        matches = []

        for match in content1.scanString(data):
            matches.append(match)

        # There will be only one match
        if matches:
            data = matches[0][0][-1]
            idx1 = data.find('<table')
            if idx1 != -1:
                idx2 = data.find('</table>', idx1)
                if idx2 != -1:
                    data = data[idx1:idx2 + 8]
                    return self.make_urls(content2, data, filename)

        return []
Пример #2
0
    def search_filewatcher(self, filename):

        # Note: this grammar could change if the site changes its templates
        grammar = Literal("<p>") + Literal("<big>") + Literal("<a") + Literal("href") + Literal("=") + \
                  SkipTo(Literal(">"))

        urls = []
        search_url = self.sites['filewatcher'][0] % filename

        conn = connector.HarvestManUrlConnector()
        data = conn.get_url_data(search_url)

        return self.make_urls(grammar, data, filename)
Пример #3
0
    def grab_url(self, url, filename=None):
        """ Download the given URL and save it to the (optional) filename """

        # If a filename is given, set outfile to it
        if filename:
            objects.config.hgetoutfile = filename
            # print 'Saving to',filename

        # We need to reset some counters and
        # data structures ...

        # Reset progress object
        objects.config.reset_progress()
        # Reset thread pool, multipart status
        self._pool.reset_multipart_data()
        # Reset monitor
        self._monitor.reset()
        # Reset mirror manager
        mirrormgr = mirrors.HarvestManMirrorManager.getInstance()
        mirrormgr.reset()

        try:
            # print objects.config.requests, objects.config.connections
            conn = connector.HarvestManUrlConnector()
            urlobj = None

            try:
                print '\nDownloading URL', url, '...'
                urlobj = urlparser.HarvestManUrl(url)
                ret = conn.url_to_file(urlobj)

                if urlobj.trymultipart and mirrormgr.used:
                    # Print stats if mirrors were used...
                    mirrormgr.print_stats()

                return HGET_DOWNLOAD_OK
            except urlparser.HarvestManUrlError, e:
                print str(e)
                print 'Error: Invalid URL "%s"' % url

                return HGET_DOWNLOAD_ERROR

        except KeyboardInterrupt, e:
            print 'Caught keyboard interrupt...'
            if urlobj: self.clean_up(conn, urlobj)

            return HGET_KEYBOARD_INTERRUPT
Пример #4
0
    def calculate_bandwidth(self):
        """ Calculate bandwidth of the user by downloading a specific URL and timing it,
        setting a limit on maximum file size """

        # Calculate bandwidth
        bw = 0
        # Look for harvestman.conf in user conf dir
        conf = os.path.join(objects.config.userconfdir, 'harvestman.conf')
        if not os.path.isfile(conf):
            conn = connector.HarvestManUrlConnector()
            urlobj = urlparser.HarvestManUrl(
                'http://harvestmanontheweb.com/schemas/HarvestMan.xsd')
            bw = conn.calc_bandwidth(urlobj)
            bwstr = 'bandwidth=%f\n' % bw
            if bw:
                try:
                    open(conf, 'w').write(bwstr)
                except IOError, e:
                    pass