Пример #1
0
    def _batch_download(self, uris, local_path=None, throttle=0):
        """Downloads a package from specified uri. This is a W.I.P!!!

        Args:
            uris (list of strings) - Uris of the package to download.
            local_path (string) - Full path where the package is be saved.
                Do not include a file name.
            throttle (int) - Number of kilobytes to throttle the bandwidth by.
                If throttle == 0, throttling is disabled.

        Returns:
            True if package downloaded successfully. False otherwise.
        """

        success = False

        if throttle != 0:
            throttle *= 1024

        for uri in uris:
            try:

                if local_path:

                    name = uri.split('/')[-1]
                    if '?' in name:
                        name = name.split('?')[0]

                    path = os.path.join(local_path, name)

                else:

                    urlgrab(uri, throttle=throttle)
            except Exception as e:
                logger.exception(e)
Пример #2
0
   def run(self,force=False):
       """
       Download bootloader content for all of the latest bootloaders, since the user
       has chosen to not supply their own.  You may ask "why not get this from yum", though
       Fedora has no IA64 repo, for instance, and we also want this to be able to work on Debian and
       further do not want folks to have to install a cross compiler.  For those that don't like this approach
       they can still source their cross-arch bootloader content manually.
       """

       content_server = "http://mdehaan.fedorapeople.org/loaders"
       dest = "/var/lib/cobbler/loaders"

       files = (
          ( "%s/README" % content_server, "%s/README" % dest ),
          ( "%s/COPYING.elilo" % content_server, "%s/COPYING.elilo" % dest ),
          ( "%s/COPYING.yaboot" % content_server, "%s/COPYING.yaboot" % dest),
          ( "%s/COPYING.syslinux" % content_server, "%s/COPYING.syslinux" % dest),
          ( "%s/elilo-3.8-ia64.efi" % content_server, "%s/elilo-ia64.efi" % dest ),
          ( "%s/yaboot-1.3.14-12" % content_server, "%s/yaboot" % dest),
          ( "%s/pxelinux.0-3.61" % content_server, "%s/pxelinux.0" % dest),
          ( "%s/menu.c32-3.61" % content_server, "%s/menu.c32" % dest),
       )

       self.logger.info("downloading content required to netboot all arches")
       for f in files:
          src = f[0]
          dst = f[1]
          if os.path.exists(dst) and not force:
             self.logger.info("path %s already exists, not overwriting existing content, use --force if you wish to update" % dst)
             continue
          self.logger.info("downloading %s to %s" % (src,dst))
          urlgrabber.urlgrab(src,dst)

       return True
Пример #3
0
    def fetch (self):
        """Return value: Fetched file's full path.."""

        # import urlgrabber module
        try:
            import urlgrabber
        except ImportError:
            raise FetchError(_('Urlgrabber needs to be installed to run this command'))

        if not self.url.filename():
            raise FetchError(_('Filename error'))

        if not os.access(self.destdir, os.W_OK):
            raise FetchError(_('Access denied to write to destination directory: "%s"') % (self.destdir))

        if os.path.exists(self.archive_file) and not os.access(self.archive_file, os.W_OK):
            raise FetchError(_('Access denied to destination file: "%s"') % (self.archive_file))

        try:
            urlgrabber.urlgrab(self.url.get_uri(),
                           self.partial_file,
                           progress_obj = UIHandler(self.progress),
                           http_headers = self._get_http_headers(),
                           ftp_headers  = self._get_ftp_headers(),
                           proxies      = self._get_proxies(),
                           throttle     = self._get_bandwith_limit(),
                           reget        = self._test_range_support(),
                           user_agent   = 'PiSi Fetcher/' + pisi.__version__)
        except urlgrabber.grabber.URLGrabError, e:
            raise FetchError(_('Could not fetch destination file "%s": %s') % (self.archive_file, e))
Пример #4
0
def fetchHTMLFiles(clubDict, league, season='2016'):
    # create csv directory
    dir = os.path.dirname(baseDirname)
    if not os.path.exists(dir):
        os.makedirs(dir)

    # create league directory inside HTML directory
    dir = os.path.dirname(baseDirname + league + '/')
    if not os.path.exists(dir):
        os.makedirs(dir)

    # create season directory inside league directory
    dir = os.path.dirname(baseDirname + league + '/' + season + '/')
    if not os.path.exists(dir):
        os.makedirs(dir)

    print "[File Conqueror]  Getting HTML for league: %s\tseason: %s" % (league, season)

    url = constants.urls[league]['baseUrl'] + constants.urls[league]['urlPrefix'] + season + constants.urls[league]['urlSuffix']

    filename = baseDirname + league + '/' + season + '/' + 'playerStats.html'

    try:
        urlgrabber.urlgrab(url, filename, retries=5)
    except Exception, e:
        time.sleep(60)
        urlgrabber.urlgrab(url, filename, retries=5)

        print "Exception occurred!", e
        print "URL: ", url

        pass
Пример #5
0
   def run(self,force=False):
       """
       Download bootloader content for all of the latest bootloaders, since the user
       has chosen to not supply their own.  You may ask "why not get this from yum", though
       Fedora has no IA64 repo, for instance, and we also want this to be able to work on Debian and
       further do not want folks to have to install a cross compiler.  For those that don't like this approach
       they can still source their cross-arch bootloader content manually.
       """

       content_server = "http://mdehaan.fedorapeople.org/loaders"
       dest = "/var/lib/cobbler/loaders"

       files = (
          ( "%s/README" % content_server, "%s/README" % dest ),
          ( "%s/COPYING.elilo" % content_server, "%s/COPYING.elilo" % dest ),
          ( "%s/COPYING.yaboot" % content_server, "%s/COPYING.yaboot" % dest),
          ( "%s/COPYING.syslinux" % content_server, "%s/COPYING.syslinux" % dest),
          ( "%s/elilo-3.8-ia64.efi" % content_server, "%s/elilo-ia64.efi" % dest ),
          ( "%s/yaboot-1.3.14-12" % content_server, "%s/yaboot" % dest),
          ( "%s/pxelinux.0-3.61" % content_server, "%s/pxelinux.0" % dest),
          ( "%s/menu.c32-3.61" % content_server, "%s/menu.c32" % dest),
       )

       self.logger.info("downloading content required to netboot all arches")
       for f in files:
          src = f[0]
          dst = f[1]
          if os.path.exists(dst) and not force:
             self.logger.info("path %s already exists, not overwriting existing content, use --force if you wish to update" % dst)
             continue
          self.logger.info("downloading %s to %s" % (src,dst))
          urlgrabber.urlgrab(src,dst)

       return True
Пример #6
0
    def srpm_from_ticket(self):
        '''Retrieve the latest srpmURL from the buzilla URL.
        '''
        try:
            bugzillaURL = self.checklist.properties['ticketURL'].value
        except KeyError:
            # No ticket URL was given, set nothing
            return

        if not bugzillaURL:
            # No ticket URL was given, set nothing
            return

        data = urlgrabber.urlread(bugzillaURL)
        srpmList = re.compile('"((ht|f)tp(s)?://.*?\.src\.rpm)"', re.IGNORECASE).findall(data)
        if srpmList == []:
            # No SRPM was found.  Just decide not to set anything.
            return
        # Set the srpm to the last SRPM listed on the page
        srpmURL = srpmList[-1][0]
        if not srpmURL:
            # No srpm found.  Just decide not to set anything.
            return
        # Download the srpm to the temporary directory.
        urlgrabber.urlgrab(srpmURL, self.tmpDir)
        # Fill the SRPMfile properties with the srpm in the temp directory
        self.checklist.properties['SRPMfile'].value = (
                self.tmpDir + os.path.basename(srpmURL))
Пример #7
0
    def __download_prop_file(self):
        """ download prop file and validate """
        # retry 3 times download prop file
        for _ in range(3):
            try:
                sotimeout = float(pylons.config['download_thread_sotimeout'])
                proxies = json.loads(pylons.config['urlgrabber_proxies'])
                urlgrabber.urlgrab(self.__uriDict['propUri'],
                                   self.__uriDict['propPath'],
                                   keepalive=0,
                                   timeout=sotimeout,
                                   proxies=proxies)
                break
            except Exception:
                randsleep = randint(30, 60)
                time.sleep(randsleep)

        if (not os.path.exists(self.__uriDict['propPath'])):
            raise AgentException(
                Errors.DC_MISSING_PROP_FILE,
                'Prop file (%s) does not exist' % (self.__uriDict['propPath']))

        if not PackageUtil.validateProp(self.__uriDict['propPath']):
            raise AgentException(
                Errors.DC_MISSING_PROP_FILE,
                'Prop file (%s) failed validation' %
                (self.__uriDict['propPath']))
Пример #8
0
    def _batch_download(self, uris, local_path=None, throttle=0):
        """Downloads a package from specified uri. This is a W.I.P!!!

        Args:
            uris (list of strings) - Uris of the package to download.
            local_path (string) - Full path where the package is be saved.
                Do not include a file name.
            throttle (int) - Number of kilobytes to throttle the bandwidth by.
                If throttle == 0, throttling is disabled.

        Returns:
            True if package downloaded successfully. False otherwise.
        """

        success = False

        if throttle != 0:
            throttle *= 1024

        for uri in uris:
            try:

                if local_path:

                    name = uri.split('/')[-1]
                    if '?' in name:
                        name = name.split('?')[0]

                    path = os.path.join(local_path, name)

                else:

                    urlgrab(uri, throttle=throttle)
            except Exception as e:
                logger.exception(e)
Пример #9
0
 def __download_prop_file(self):
     """ download prop file and validate """
     # retry 3 times download prop file
     for _ in range(3):
         try:
             sotimeout = float(pylons.config['download_thread_sotimeout'])
             proxies = json.loads(pylons.config['urlgrabber_proxies'])
             urlgrabber.urlgrab(
                         self.__uriDict['propUri'], 
                         self.__uriDict['propPath'], 
                         keepalive = 0, 
                         timeout = sotimeout,
                         proxies = proxies)
             break
         except Exception:
             randsleep = randint(30, 60)                
             time.sleep(randsleep)
     
     if (not os.path.exists(self.__uriDict['propPath'])):
         raise AgentException(Errors.DC_MISSING_PROP_FILE,
                         'Prop file (%s) does not exist' % (self.__uriDict['propPath']))
     
     if not PackageUtil.validateProp(self.__uriDict['propPath']):
         raise AgentException(Errors.DC_MISSING_PROP_FILE,
                         'Prop file (%s) failed validation' % (self.__uriDict['propPath']))
Пример #10
0
def download_file(uri, dl_path, throttle):
    if uri.startswith('https://api.github.com/'):
        # TODO: handle 200 and 302 response
        headers = (("Accept", "application/octet-stream"),)
        urlgrab(uri, filename=dl_path, throttle=throttle, http_headers=headers)

    else:
        urlgrab(uri, filename=dl_path, throttle=throttle)
Пример #11
0
 def _download_file(self, filename):
     url = "%s%s" % (self.EPF_FULL_URL %
                     (self.username, self.password), filename)
     urlgrab(url,
             "%s/%s" % (self.target_dir, filename),
             progress_obj=text_progress_meter(),
             reget="simple",
             retry=0)
Пример #12
0
    def downloadPackage(self):

        # download the package
        urlgrabber.urlgrab(self.testPkgUri, self.localPkgName)
        urlgrabber.urlgrab(self.testPkgUri + '.prop', self.localPkgName + '.prop')

        LOG.debug('localpackagename = %s', self.localPkgName)
        assert os.path.exists(self.localPkgName + '.prop')
        assert os.path.exists(self.localPkgName)
Пример #13
0
    def downloadPackage(self):

        # download the package
        urlgrabber.urlgrab(self.testPkgUri, self.localPkgName)
        urlgrabber.urlgrab(self.testPkgUri + '.prop',
                           self.localPkgName + '.prop')

        LOG.debug('localpackagename = %s', self.localPkgName)
        assert os.path.exists(self.localPkgName + '.prop')
        assert os.path.exists(self.localPkgName)
def get_neg_images(url, neg_image_folder):
    images_urls = urllib.urlopen(url).read().decode('utf8')
    pic_num = 1
    for iurl in images_urls.split('\n'):
        try:
            print (iurl)
            urlgrab(iurl, neg_image_folder + "/" + str(pic_num) + ".jpg", timeout = 20)
            img = cv2.imread(neg_image_folder + "/" + str(pic_num) + ".jpg", cv2.IMREAD_GRAYSCALE)
            resize_img = cv2.resize(img, (100, 100))
            cv2.imwrite(neg_image_folder + "/" + str(pic_num) + ".jpg", resize_img)
            pic_num += 1
        except Exception as e:
            print str(e)
Пример #15
0
def page_download(page_url, folder):
    page = urllib2.urlopen(page_url)
    soup = BeautifulSoup(page)
    print len(soup.find_all("a", { "class" : "next" }))
    for src in soup.find_all('img'):
        if src.get('src').endswith(sfx):
            tgt_url = str(src.get('src').replace('small', 'big'))
            print "saving : " + tgt_url 
            tgt_name = os.path.basename(tgt_url)
            try:
                urlgrabber.urlgrab(tgt_url, "./" + folder + "/" + tgt_name, progress_obj=urlgrabber.progress.TextMeter())
            except urlgrabber.grabber.URLGrabError as detail:
                print "Error eccours: " + detail
Пример #16
0
def proses():
	# link pertama
	link = sys.argv[1]
	buka = urllib2.urlopen(link)
	cari = re.compile('a href="(.*)"   class="dbtn"')
	dapat = re.findall(cari,buka.read())

	# link download
	baru = urllib2.urlopen(dapat[0])
	lagi = re.compile('var flvLink = \'(.*)\'')
	final = re.findall(lagi,baru.read())

	prog = urlgrabber.progress.text_progress_meter()
	urlgrabber.urlgrab(final[0],sys.argv[2],progress_obj=prog)
Пример #17
0
def fetch_image_files(layer, opts):
    if opts.layer:
        path = str(opts.layer)
        if not opts.test and not os.path.isdir(path):
            os.makedirs(path)
    else:
        path = "."
    for image in layer["images"]:
        filetype = image["url"].split(".")[-1]
        target = os.path.join(path, image["hash"] + "." + filetype)
        if opts.test:
            print >>sys.stderr, image["url"], "->", target
        else:
            meter = urlgrabber.progress.text_progress_meter()
            urlgrabber.urlgrab(image["url"], target, progress_obj=meter)
Пример #18
0
def fetch_image_files(layer, opts):
    if opts.layer:
        path = str(opts.layer)
        if not opts.test and not os.path.isdir(path):
            os.makedirs(path)
    else:
        path = "."
    for image in layer["images"]:
        filetype = image["url"].split(".")[-1]
        target = os.path.join(path, image["hash"] + "." + filetype)
        if opts.test:
            print >> sys.stderr, image["url"], "->", target
        else:
            meter = urlgrabber.progress.text_progress_meter()
            urlgrabber.urlgrab(image["url"], target, progress_obj=meter)
Пример #19
0
def read_kickstart(path):
    """Parse a kickstart file and return a KickstartParser instance.

    This is a simple utility function which takes a path to a kickstart file,
    parses it and returns a pykickstart KickstartParser instance which can
    be then passed to an ImageCreator constructor.

    If an error occurs, a CreatorError exception is thrown.

    """
    version = ksversion.makeVersion()
    ks = ksparser.KickstartParser(version)
    try:
        ksfile = urlgrabber.urlgrab(path)
        ks.readKickstart(ksfile)


# Fallback to e.args[0] is a workaround for bugs in urlgragger and pykickstart.
    except IOError as e:
        raise errors.KickstartError("Failed to read kickstart file "
                                    "'%s' : %s" %
                                    (path, e.strerror or e.args[0]))
    except kserrors.KickstartError as e:
        raise errors.KickstartError("Failed to parse kickstart file "
                                    "'%s' : %s" % (path, e))
    return ks
Пример #20
0
 def fetch_jetty(self):
     """Download the requested version of Jetty"""
     if path.exists(self.home):
         return
     url = self.node.config.get('jetty','REPO') + self.version + "/jetty-distribution-" + self.version + ".tar.gz"
     if not path.exists(self.cachedir):
         os.makedirs(self.cachedir)
     f = tempfile.mktemp(prefix='jetty-' + self.version + '-', suffix='.tar.gz')
     try:
         print("Downloading Jetty from " + url)
         meter = urlgrabber.progress.TextMeter()
         urlgrabber.urlgrab(url, filename=f, progress_obj=meter)
         subprocess.check_call(["tar", "-x", "-C", self.cachedir, "-f", f])
     finally:
         os.remove(f)
     os.rename(path.join(self.cachedir, 'jetty-distribution-' + self.version), self.home)
Пример #21
0
 def updateLocalDb():
     try:
         if urlgrabber.urlgrab(self.remote_db, self.local_db) == self.local_db:
             updateLocalSum()
             return True
     except urlgrabber.grabber.URLGrabError:
         return False
Пример #22
0
 def __init__(self):
     data = StringIO.StringIO(urlgrabber.urlread("http://itunes.com/version"))
     stream = gzip.GzipFile(fileobj=data)
     data = stream.read()
     updates = plistlib.readPlistFromString(data)
     devs = self.findPods()
     for (dev, name, family, firmware) in devs:
         if not family:
             family, firmware = self.getIPodData(dev)
         print "Found %s with family %s and firmware %s" % (name, family, firmware)
         if updates["iPodSoftwareVersions"].has_key(unicode(family)):
             uri = updates["iPodSoftwareVersions"][unicode(family)]["FirmwareURL"]
             print "Latest firmware: %s" % uri
             print "Fetching firmware..."
             path = urlgrabber.urlgrab(
                 uri, progress_obj=urlgrabber.progress.text_progress_meter(), reget="check_timestamp"
             )
             print "Extracting firmware..."
             zf = zipfile.ZipFile(path)
             for name in zf.namelist():
                 if name[:8] == "Firmware":
                     print "Firmware found."
                     outfile = open("Firmware", "wb")
                     outfile.write(zf.read(name))
                     outfile.close()
                     infile = open("Firmware", "rb")
                     outfile = open(dev, "wb")
                     # FIXME: do the following in pure python?
                     print "Making backup..."
                     commands.getoutput("dd if=%s of=Backup" % dev)
                     print "Uploading firmware..."
                     commands.getoutput("dd if=Firmware of=%s" % dev)
         print "Done."
Пример #23
0
 def fetchRemoteFile(self, archive_file):
     try:
         urlgrab(self.url.get_uri(),
                 archive_file,
                 progress_obj=UIHandler(self.progress),
                 http_headers=self._get_http_headers(),
                 ftp_headers=self._get_ftp_headers(),
                 proxies=self._get_proxies(),
                 throttle=self._get_bandwith_limit(),
                 reget=self._test_range_support(archive_file),
                 copy_local=1,
                 user_agent='PISI Fetcher/' + pisi.__version__)
     except grabber.URLGrabError, e:
         raise FetchError(
             _('Could not fetch destination file "%s": %s') %
             (self.url.get_uri(), e))
Пример #24
0
def grab(url, filename, timeout=120, retry=5, proxy=None, ftpmode=False):
    print "Grabbing", url
    def grab_fail_callback(data):
        # Only print debug here when non fatal retries, debug in other cases
        # is already printed
        if (data.exception.errno in retrycodes) and (data.tries != data.retry):
            print "grabbing retry %d/%d, exception %s"%(
                data.tries, data.retry, data.exception)
    try:
        retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes
        if 12 not in retrycodes:
            retrycodes.append(12)
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
        downloaded_file = urlgrabber.urlgrab(
            url, filename,timeout=timeout,retry=retry, retrycodes=retrycodes,
            progress_obj=SimpleProgress(), failure_callback=grab_fail_callback,
            copy_local=True, proxies=proxy, ftp_disable_epsv=ftpmode)
        if not downloaded_file:
            return False
    except urlgrabber.grabber.URLGrabError as e:
        warn('URLGrabError %i: %s' % (e.errno, e.strerror))
        if os.path.exists(filename):
            os.unlink(filename)
        return False
    return True
Пример #25
0
def grab(url, filename, timeout=120, retry=5, proxy=None, ftpmode=False):
    print "Grabbing", url
    def grab_fail_callback(data):
        # Only print debug here when non fatal retries, debug in other cases
        # is already printed
        if (data.exception.errno in retrycodes) and (data.tries != data.retry):
            print "grabbing retry %d/%d, exception %s"%(
                data.tries, data.retry, data.exception)
    try:
        retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes
        if 12 not in retrycodes:
            retrycodes.append(12)
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
        downloaded_file = urlgrabber.urlgrab(
            url, filename,timeout=timeout,retry=retry, retrycodes=retrycodes,
            progress_obj=SimpleProgress(), failure_callback=grab_fail_callback,
            copy_local=True, proxies=proxy, ftp_disable_epsv=ftpmode)
        if not downloaded_file:
            return False
    except urlgrabber.grabber.URLGrabError as e:
        warn('URLGrabError %i: %s' % (e.errno, e.strerror))
        if os.path.exists(filename):
            os.unlink(filename)
        return False
    return True
Пример #26
0
def getRemoteURL(url):
    logger.info('downloading %s' % url)
    start = time.time()
    try: 
        fileName = urlgrabber.urlgrab(url, config.localOSMPath)
        fileSize = os.path.getsize(fileName)
    except Exception, e:
        logger.warning('urlgrabber: %s' % e.strerror)
        return(None)
Пример #27
0
def archive_downloader(i):
    list_name = i[0]
    year = i[1]
    month = i[2]
    if not list_name or not year or not month:
        return
    basename = "{0}-{1}.txt.gz".format(year, month)
    filename = "http://lists.fedoraproject.org/pipermail/{0}/{1}".format(list_name, basename)
    try:
        urlgrabber.urlgrab(filename)
        pos = str(months.index(month) + 1)
        if len(pos) == 1:
            pos = "0{0}".format(pos)
        newname = "{0}-{1}-{2}-{3}.txt".format(list_name, year, pos, month)
        with open(newname, "w") as f:
            f.write(gzip.open(basename).read())
        print "== {0} downloaded ==".format(filename)
    except urlgrabber.grabber.URLGrabError:
        pass
Пример #28
0
def fetch_image_files(client, bbox, opts):
    # if opts.layer:
    #    path = str(opts.layer)
    #    if not opts.test and not os.path.isdir(path):
    #        os.makedirs(path)
    # else:
    files = []
    args = {"archive":"true"} if opts.source else {}
    for image in client.images_by_bbox(bbox, **args):
        target = image.path.split("/")[-1]
        if opts.dest:
            meter = urlgrabber.progress.text_progress_meter()
            target = os.path.join(opts.dest, target)
            print >>sys.stderr, image.path, "->", target
            urlgrabber.urlgrab(str(image.path), target, progress_obj=meter)
        else:
            print >>sys.stderr, image.path, "->", target
        files.append(target)
    return files
Пример #29
0
def make_floppy(autoinst):

    (fd, floppy_path) = tempfile.mkstemp(suffix='.floppy',
                                         prefix='tmp',
                                         dir="/tmp")
    print("- creating floppy image at %s" % floppy_path)

    # create the floppy image file
    cmd = "dd if=/dev/zero of=%s bs=1440 count=1024" % floppy_path
    print("- %s" % cmd)
    rc = os.system(cmd)
    if not rc == 0:
        raise InfoException("dd failed")

    # vfatify
    cmd = "mkdosfs %s" % floppy_path
    print("- %s" % cmd)
    rc = os.system(cmd)
    if not rc == 0:
        raise InfoException("mkdosfs failed")

    # mount the floppy
    mount_path = tempfile.mkdtemp(suffix=".mnt", prefix='tmp', dir="/tmp")
    cmd = "mount -o loop -t vfat %s %s" % (floppy_path, mount_path)
    print("- %s" % cmd)
    rc = os.system(cmd)
    if not rc == 0:
        raise InfoException("mount failed")

    # download the autoinst file onto the mounted floppy
    print("- downloading %s" % autoinst)
    save_file = os.path.join(mount_path, "unattended.txt")
    urlgrabber.urlgrab(autoinst, filename=save_file)

    # umount
    cmd = "umount %s" % mount_path
    print("- %s" % cmd)
    rc = os.system(cmd)
    if not rc == 0:
        raise InfoException("umount failed")

    # return the path to the completed disk image to pass to virt-install
    return floppy_path
Пример #30
0
def make_floppy(kickstart):

    (fd, floppy_path) = tempfile.mkstemp(
        suffix='.floppy', prefix='tmp', dir="/tmp")
    print("- creating floppy image at %s" % floppy_path)

    # create the floppy image file
    cmd = "dd if=/dev/zero of=%s bs=1440 count=1024" % floppy_path
    print("- %s" % cmd)
    rc = os.system(cmd)
    if not rc == 0:
        raise InfoException("dd failed")

    # vfatify
    cmd = "mkdosfs %s" % floppy_path
    print("- %s" % cmd)
    rc = os.system(cmd)
    if not rc == 0:
        raise InfoException("mkdosfs failed")

    # mount the floppy
    mount_path = tempfile.mkdtemp(suffix=".mnt", prefix='tmp', dir="/tmp")
    cmd = "mount -o loop -t vfat %s %s" % (floppy_path, mount_path)
    print("- %s" % cmd)
    rc = os.system(cmd)
    if not rc == 0:
        raise InfoException("mount failed")

    # download the kickstart file onto the mounted floppy
    print("- downloading %s" % kickstart)
    save_file = os.path.join(mount_path, "unattended.txt")
    urlgrabber.urlgrab(kickstart, filename=save_file)

    # umount
    cmd = "umount %s" % mount_path
    print("- %s" % cmd)
    rc = os.system(cmd)
    if not rc == 0:
        raise InfoException("umount failed")

    # return the path to the completed disk image to pass to virt-install
    return floppy_path
Пример #31
0
    def fetch(self):
        """Return value: Fetched file's full path.."""

        # import urlgrabber module
        try:
            import urlgrabber
        except ImportError:
            raise FetchError(
                _('Urlgrabber needs to be installed to run this command'))

        if not self.url.filename():
            raise FetchError(_('Filename error'))

        if not os.access(self.destdir, os.W_OK):
            raise FetchError(
                _('Access denied to write to destination directory: "%s"') %
                (self.destdir))

        if os.path.exists(self.archive_file) and not os.access(
                self.archive_file, os.W_OK):
            raise FetchError(
                _('Access denied to destination file: "%s"') %
                (self.archive_file))

        try:
            urlgrabber.urlgrab(
                self.url.get_uri(),
                self.partial_file,
                progress_obj=UIHandler(self.progress),
                http_headers=self._get_http_headers(),
                ftp_headers=self._get_ftp_headers(),
                proxies=self._get_proxies(),
                throttle=self._get_bandwith_limit(),
                reget=self._test_range_support(),
                copy_local=1,
                retry=3,  # retry 3 times
                timeout=120,  # Reduce from default of 5 minutes to 2 minutes
                user_agent='eopkg Fetcher/' + pisi.__version__)
        except urlgrabber.grabber.URLGrabError, e:
            raise FetchError(
                _('Could not fetch destination file "%s": %s') %
                (self.url.get_uri(), e))
Пример #32
0
def download_rpms(pkg, outdir):
    """
    TBD.

    :param pkg: A dict contains RPM basic information other than url
    :param outdir: Where to save RPM[s]
    """
    url = RS.call("packages.getPackageUrl", [pkg["id"]], ["--no-cache"])[0]
    logging.info("RPM URL: " + ', '.join(url))

    return urlgrabber.urlgrab(url, os.path.join(outdir, os.path.basename(url)))
Пример #33
0
    def _process_patch(self, patch):
        import pdb
        pdb.set_trace()

        url = urlparse.urlparse(patch.file)
        src_dir = self.build_dir('SOURCES')

        if not url.scheme:
            filename = os.path.basename(url.path)
            path = os.path.join(self.base_path, filename)

            dest = os.path.join(src_dir, filename)
            if not os.path.exists(dest) or not os.path.samefile(path, dest):
                shutil.copyfile(path, dest)
        else:
            filename = url.path.rsplit('/', 1)[-1]
            dest = os.path.join(src_dir, filename)
            urlgrabber.urlgrab(url, dest)

        return Patch(filename, patch.options)
Пример #34
0
def fetchHTMLFiles(clubDict, league, season='15'):
    # create HTML directory
    dir = os.path.dirname(baseDirname)
    if not os.path.exists(dir):
        os.makedirs(dir)

    # create league directory inside HTML directory
    dir = os.path.dirname(baseDirname + league + '/')
    if not os.path.exists(dir):
        os.makedirs(dir)

    # create season directory inside league directory
    dir = os.path.dirname(baseDirname + league + '/' + season + '/')
    if not os.path.exists(dir):
        os.makedirs(dir)

    for clubName, clubId in clubDict.iteritems():
        print "[File Getter]  Getting HTML for club: %s\tleague: %s\tseason: 20%s" % \
              (clubName, league, season)

        url = baseURL + ` clubId `
        filename = baseDirname + league + '/' + season + '/' + clubName + '_' + ` clubId `

        if (season != '15'):
            url = baseURL + ` clubId ` + '?saison_id=20' + season

        # because of different season schedule seasons are shifted for one number in MLS...
        if (league == 'MajorLeagueSoccer'):
            url = baseURL + ` clubId ` + '?saison_id=' + ` (
                int('20' + season) - 1) `

        try:
            urlgrabber.urlgrab(url, filename, retries=5)
        except Exception, e:
            time.sleep(60)
            urlgrabber.urlgrab(url, filename, retries=5)

            print "Exception occurred!", e
            print "URL: ", url

            pass
Пример #35
0
def get_images(active=True, outdir='player_images', outlist='player_names.csv'):
    import bs4, urlgrabber, httplib

    if active:
        list = 'http://stats.nba.com/frags/stats-site-page-players-directory-active.html'
    else:
        list = 'http://stats.nba.com/players.html'

    # prepare player list
    flist = open(outlist, 'w')
    flist.write('# name\n')

    # fetch and parse the NBA player list
    player_page = urlgrabber.urlread(list)
    soup = bs4.BeautifulSoup(player_page)

    # loop through the player list
    for p in soup('a', 'playerlink'):
        phref = str(p['href'])

        ## exclude "historical" players
        #if (len(phref.split('HISTADD')) == 1):

        # verify that player pages exist
        pname = phref.split('/')[-1]
        conn = httplib.HTTPConnection('i.cdn.turner.com')
        conn.request('HEAD', '/nba/nba/.element/img/2.0/sect/statscube/players/large/'+pname+'.png')
        if (conn.getresponse().status != 404):

            # download and save player images
            img_link = 'http://i.cdn.turner.com/nba/nba/.element/img/2.0/sect/statscube/players/large/'+pname+'.png'
            urlgrabber.urlgrab(img_link, filename=outdir+'/'+pname+'.png')

            # write player names to list
            flist.write(pname+'\n')

    # close name list
    flist.close()

    return
Пример #36
0
def download(url, progress=False):
    """ Download the document pointed to by url to cwd
    """
    filename = get_filename(url)

    if os.path.exists(filename):
        info(filename + " already exists in cwd. Not downloading. ")
    else:
        debug("Downloading " + url)

        if progress:
            import urlgrabber
            from urlgrabber.progress import text_progress_meter

            urlgrabber.urlgrab(url=url,
                               filename=filename,
                               progress_obj=text_progress_meter())
        else:
            urllib.urlretrieve(url=url, filename=filename)

        debug("Finished Downloading " + filename)

    return filename
Пример #37
0
    def get_mediaproducts(self):
        """
        Return path to media.1/products file if available

        :returns: str
        """
        media_products_path = os.path.join(self._get_repodata_path(), 'media.1/products')
        try:
            (s,b,p,q,f,o) = urlparse(self.url)
            if p[-1] != '/':
                p = p + '/'
            p = p + 'media.1/products'
        except (ValueError, IndexError, KeyError) as e:
            return None
        url = urlunparse((s,b,p,q,f,o))
        try:
            urlgrabber_opts = {}
            self.set_download_parameters(urlgrabber_opts, url, media_products_path)
            urlgrabber.urlgrab(url, media_products_path, **urlgrabber_opts)
        except Exception as exc:
            # no 'media.1/products' file found
            return None
        return media_products_path
Пример #38
0
def read_kickstart(path):
    """Parse a kickstart file and return a KickstartParser instance.

    This is a simple utility function which takes a path to a kickstart file,
    parses it and returns a pykickstart KickstartParser instance which can
    be then passed to an ImageCreator constructor.

    If an error occurs, a CreatorError exception is thrown.

    """
    version = ksversion.makeVersion()
    ks = ksparser.KickstartParser(version)
    try:
        ksfile = urlgrabber.urlgrab(path)
        ks.readKickstart(ksfile)
    # Fallback to e.args[0] is a workaround for bugs in urlgragger and pykickstart.
    except IOError, e:
        raise errors.KickstartError("Failed to read kickstart file " "'%s' : %s" % (path, e.strerror or e.args[0]))
Пример #39
0
Файл: util.py Проект: tyll/bodhi
def sanity_check_repodata(myurl):
    """
    Sanity check the repodata for a given repository.
    Initial implementation by Seth Vidal.
    """
    myurl = str(myurl)
    tempdir = tempfile.mkdtemp()
    errorstrings = []
    if myurl[-1] != '/':
        myurl += '/'
    baseurl = myurl
    if not myurl.endswith('repodata/'):
        myurl += 'repodata/'
    else:
        baseurl = baseurl.replace('repodata/', '/')

    rf = myurl + 'repomd.xml'
    try:
        rm = urlgrabber.urlopen(rf)
        repomd = repoMDObject.RepoMD('foo', rm)
        for t in repomd.fileTypes():
            data = repomd.getData(t)
            base, href = data.location
            if base:
                loc = base + '/' + href
            else:
                loc = baseurl + href

            destfn = tempdir + '/' + os.path.basename(href)
            dest = urlgrabber.urlgrab(loc, destfn)
            ctype, known_csum = data.checksum
            csum = checksum(ctype, dest)
            if csum != known_csum:
                errorstrings.append("checksum: %s" % t)

            if href.find('xml') != -1:
                decompressed = decompress(dest)
                retcode = subprocess.call(
                    ['/usr/bin/xmllint', '--noout', decompressed])
                if retcode != 0:
                    errorstrings.append("failed xml read: %s" % t)

    except urlgrabber.grabber.URLGrabError, e:
        errorstrings.append('Error accessing repository %s' % e)
Пример #40
0
def sanity_check_repodata(myurl):
    """
    Sanity check the repodata for a given repository.
    Initial implementation by Seth Vidal.
    """
    myurl = str(myurl)
    tempdir = tempfile.mkdtemp()
    errorstrings = []
    if myurl[-1] != '/':
        myurl += '/'
    baseurl = myurl
    if not myurl.endswith('repodata/'):
        myurl += 'repodata/'
    else:
        baseurl = baseurl.replace('repodata/', '/')

    rf = myurl + 'repomd.xml'
    try:
        rm = urlgrabber.urlopen(rf)
        repomd = repoMDObject.RepoMD('foo', rm)
        for t in repomd.fileTypes():
            data = repomd.getData(t)
            base, href = data.location
            if base:
                loc = base + '/' + href
            else:
                loc = baseurl + href

            destfn = tempdir + '/' + os.path.basename(href)
            dest = urlgrabber.urlgrab(loc, destfn)
            ctype, known_csum = data.checksum
            csum = checksum(ctype, dest)
            if csum != known_csum:
                errorstrings.append("checksum: %s" % t)

            if href.find('xml') != -1:
                decompressed = decompress(dest)
                retcode = subprocess.call(['/usr/bin/xmllint', '--noout', decompressed])
                if retcode != 0:
                    errorstrings.append("failed xml read: %s" % t)

    except urlgrabber.grabber.URLGrabError, e:
        errorstrings.append('Error accessing repository %s' % e)
Пример #41
0
 def get_file(self, path, local_base=None):
     try:
         try:
             temp_file = ""
             if local_base is not None:
                 target_file = os.path.join(local_base, path)
                 target_dir = os.path.dirname(target_file)
                 if not os.path.exists(target_dir):
                     os.makedirs(target_dir, int('0755', 8))
                 temp_file = target_file + '..download'
                 if os.path.exists(temp_file):
                     os.unlink(temp_file)
                 downloaded = urlgrabber.urlgrab(path, temp_file)
                 os.rename(downloaded, target_file)
                 return target_file
             else:
                 return urlgrabber.urlread(path)
         except urlgrabber.URLGrabError:
             return
     finally:
         if os.path.exists(temp_file):
             os.unlink(temp_file)
Пример #42
0
    def get_file(self, path, local_base=None):
        try:
            try:
                temp_file = ""
                try:
                    if not urlparse(path).scheme:
                        (s,b,p,q,f,o) = urlparse(self.url)
                        if p[-1] != '/':
                            p = p + '/'
                        p = p + path
                        path = urlunparse((s,b,p,q,f,o))
                except (ValueError, IndexError, KeyError) as e:
                    return None

                if local_base is not None:
                    target_file = os.path.join(local_base, path)
                    target_dir = os.path.dirname(target_file)
                    if not os.path.exists(target_dir):
                        os.makedirs(target_dir, int('0755', 8))
                    temp_file = target_file + '..download'
                    if os.path.exists(temp_file):
                        os.unlink(temp_file)
                    urlgrabber_opts = {}
                    self.set_download_parameters(urlgrabber_opts, path, temp_file)
                    downloaded = urlgrabber.urlgrab(path, temp_file, **urlgrabber_opts)
                    os.rename(downloaded, target_file)
                    return target_file
                else:
                    urlgrabber_opts = {}
                    self.set_download_parameters(urlgrabber_opts, path)
                    return urlgrabber.urlread(path, **urlgrabber_opts)
            except urlgrabber.grabber.URLGrabError:
                return
        finally:
            if os.path.exists(temp_file):
                os.unlink(temp_file)
Пример #43
0
                os.makedirs(os.path.dirname(destfile))
            except OSError:
                pass

        if self.callbacks.has_key(name):
            self.callbacks[name]()

        opos = self.pos[name]
        while True:
            sourceurl = self.__createSourceURI(uri, name)
            print sourceurl, destfile, self.headers[name]
            try:
                if force:
                    f = urlgrab(sourceurl,
                                destfile,
                                timeout=30.0,
                                copy_local=copy_local,
                                http_headers=self.headers[name],
                                ssl_ca_cert='/usr/share/rhn/RHNS-CA-CERT')
                else:
                    f = urlgrab(sourceurl,
                                destfile,
                                timeout=30.0,
                                reget='check_timestamp',
                                copy_local=copy_local,
                                http_headers=self.headers[name],
                                ssl_ca_cert='/usr/share/rhn/RHNS-CA-CERT')
            except Exception, e:
                # urlgrab fails with invalid range for already completely
                # transfered files, pretty strange to me to be honest... :)
                if e[0] == 9:
                    f = destfile
def main(argv):
  """Download utility to simplify the download of USPTO patent data

  USPTO patent applications are currently hosted by Google. In most cases, you
  will be interested in all patents from a specific year or which lie in a
  relevant period of time. Since downloading each compressed file separately
  is cumbersome, this download utility might help you.

  This tools offers three basic operations:
  (1) -d  Downloads the relevant files one at a time; might be slow.
  (2) -f  Lists all available hyperlinks pointing to zip files, and store them
      in year-based text files. This is suitable for all that want to use their
      own donwload utility (e.g. parallelise the downloads).
  (3) -p  Prints all links found to zip files to the standard out


  Usage: python uspto-patent-downloader.py [options]

  Options:
    -d .., --download     downloads each zip file found using 'url'
    -f .., --files        writes all relevant links found into files; one file for each year
    -h, --help            shows this help
    -l .., --loglevel ... determines the log level (INFO, DEBUG, ..)
    -o .., --out ...      specifies the output directory
                          (default: './uspto-files')
    -p, --print           prints all relevant links found to the standard out
                          (this option is selected per default if '-f' is misssing)
    -u .., --url ...      url to the USPTO patent applications bibliography hosted by Google
                          (default: http://www.google.com/googlebooks/uspto-patents-applications-biblio.html)
    -y .., --years ...    comma separated list of years (e.g. '2002,2004') to consider for download
                          (default: all years are considered from 2001 to now)

  Examples:
    uspto-patent-downloader.py -list > links-to-download.txt
    uspto-patent-downloader.py -u http://www.google.com/googlebooks/uspto-patents-applications-biblio.html -f
    uspto-patent-downloader.py -years 2001,2002
    uspto-patent-downloader.py -f -y 2003 -out .
  """

  defaults = {
    'uspto_url': 'http://www.google.com/googlebooks/uspto-patents-applications-biblio.html',
    'html_page': None,
    'requested_years': [],
    'write_to_stdout': False,
    'print_to_file': False,
    'download': False,
    'output_directory': '.'
  }

  validate_input(argv, defaults);

  if not defaults['html_page']:
    defaults['html_page'] = load_url(uspto_url)
  soup = BeautifulSoup(html_page)

  links_per_year = defaultdict(list)
  links = soup.findAll('a', attrs={ 'href': re.compile('zip$') })
  logging.info(' found ' + str(len(links)) + ' links')
  for link in links:
    logging.debug('  . ' + link['href'])
    matched_year = re.search( '/([0-9]{4})/', link['href'])
    if matched_year:
      links_per_year[matched_year.group(1)].append(link['href'])
  filtered_dict = links_per_year
  if requested_years:
    filtered_dict = { year : links_per_year[year] for year in requested_years }
  if write_to_stdout:
    for links in sorted(filtered_dict.itervalues()):
      for link in links:
        print link
  if print_to_file:
    makedirs(output_directory)
    for k,v in filtered_dict.iteritems():
      basename = k + '.txt'
      filename = output_directory + '/' + basename
      if os.path.isfile(filename):
        os.remove(filename)
      with open(filename, 'a') as text_file:
        for link in sorted(v):
          text_file.write(link + '\n')
        logging.debug(' file ' + basename + ' written to disk')
    logging.info(' all files written to disk')
  if download:
    for year, links in filtered_dict.iteritems():
      makedirs(os.path.join(output_directory, year))
      for link in links:
        try:
          filename = os.path.join(output_directory, year, link.split('/')[-1])
          prog = urlgrabber.progress.text_progress_meter()
          urlgrabber.urlgrab(str(link), filename, progress_obj=prog)
        except Exception, e:
          logging.warn(' error while downloading %s: %s' % (link, e))
Пример #45
0
def fileDowload(url, full_destiny):
    try:
        print(time.strftime("%H:%M:%S")+": Start downloading file - "+full_destiny)
        dataj = urlgrab(str(url), str(full_destiny), **opts)
    except Exception:
        print("cannot connect to the server ")
Пример #46
0
 def test_urlgrab(self):
     "module-level urlgrab() function"
     _, outfile = tempfile.mkstemp()
     filename = urlgrabber.urlgrab('http://www.python.org', 
                                 filename=outfile)
     os.unlink(outfile)
#!/usr/bin/env python
""" Grabs all the sources for a given version number of elasticsearch """

import urlgrabber
from optparse import OptionParser


parser = OptionParser()
(options,args) = parser.parse_args()
print "Getting Main Source for version %s " % args[0]
urlgrabber.urlgrab('https://github.com/downloads/elasticsearch/elasticsearch/elasticsearch-%s.tar.gz' % args[0])
print "Getting plugin-analysis-icu"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/analysis-icu/elasticsearch-analysis-icu-%s.zip' % args[0])
print "Getting plugin-cloud-aws"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/cloud-aws/elasticsearch-cloud-aws-%s.zip' % args[0])
print "Getting plugin-hadoop"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/hadoop/elasticsearch-hadoop-%s.zip' % args[0])
print "Getting plugin-lang-groovy"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/lang-groovy/elasticsearch-lang-groovy-%s.zip' % args[0])
print "Getting plugin-lang-javascript"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/lang-javascript/elasticsearch-lang-javascript-%s.zip' % args[0])
print "Getting plugin-lang-python"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/lang-python/elasticsearch-lang-python-%s.zip' % args[0])
print "Getting plugin-mapper-attachments"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/mapper-attachments/elasticsearch-mapper-attachments-%s.zip' % args[0])
print "Getting plugin-river-couchdb"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/river-couchdb/elasticsearch-river-couchdb-%s.zip' % args[0])
print "Getting plugin-river-rabbitmq"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/river-rabbitmq/elasticsearch-river-rabbitmq-%s.zip' % args[0])
print "Getting plugin-river-twitter"
urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/river-twitter/elasticsearch-river-twitter-%s.zip' % args[0])
Пример #48
0
	




# parsed_xml = xmlparse.parse(u2.urlopen(feed_url))

    try:
  	    all_downloads = [item.findtext('link') for item in xmlparse.parse(u2.urlopen(feed_url)).iterfind('channel/item')]
    except xmlparse.ParseError:
        print "Error: invalid RSS feed. Quitting ..."
        exit(0)
    except URLError as e:
    	print str(e)
    	exit(0)
    except ValueError as e:
    	print str(e)
    	exit(0)
    except KeyError as e:
    	print str(e)
    	exit(0)

    print all_downloads

    # downloading
    for single_download in all_downloads:
    	print "Starting: "+ single_download
    	g = ug.grabber.URLGrabber(reget='simple',retry=2)
    	response = ug.urlgrab(single_download)
    	print "Completed: "+ single_download
Пример #49
0
        base_filename = os.path.basename(filename)
        base_upstream_filename = os.path.basename(files[filename])
        modname = os.path.splitext(base_upstream_filename)[0]

        if not os.path.exists(modname):
            os.mkdir(modname)
            sp = open(os.path.join('..', filename), 'r')
            sp_sum = sha1(sp.read()).hexdigest()
            sp_sum_line = '%s\n' % sp_sum
            record = open('%s/record' % modname, 'w')
            record.writelines(sp_sum_line)
            record.close()

        os.chdir(modname)

        urlgrabber.urlgrab(files[filename])
        sp = open(base_upstream_filename)
        sp_sum = sha1(sp.read()).hexdigest()
        sp_sum_line = '%s\n' % sp_sum

        record = open('record', 'r')
        if sp_sum_line in record:
            os.remove(base_upstream_filename)
            os.chdir('..')
            continue

        retrieved_at = int(time.time())
        new_name = '%s-%s' % (retrieved_at, base_upstream_filename)
        os.rename(base_upstream_filename, new_name)

        print('New %s found: %s' % (base_upstream_filename, new_name))
Пример #50
0
from urlgrabber import urlgrab
url = 'http://i1.letvimg.com/vrs/201204/05/c3671b2ca6be47c6bcdb4d32e24f60ab.jpg'

try:
    filename = urlgrab(url, '/tmp/' + 'image')
    print('download %s ok' % filename)
except Exception as (errno, strerr):
    print('download failed - ERRNO: %d ERR INFO: %s ' % (errno, strerr))
Пример #51
0
    def __startDownload(self):
        """ actual download logic """
        try:
            LOG.info("Starting package download for package %s" %
                     self.__uriDict['package'])

            # check to see if there's an in progress file,
            # since PackageMgr guarantees that duplicate threads will not be spawned
            # for same pkg, assume an existing thread was killed.
            # attempt to clean up package n move
            if (os.path.exists(self.__uriDict['inProgressPackagePath'])):
                LOG.debug(
                    'In progress file (%s) already exists. Will validate and reattempt download if necessary'
                    % self.__uriDict['inProgressPackagePath'])

            if os.path.exists(self.__uriDict['packagePath']):
                if (os.path.exists(self.__uriDict['propPath']) and
                        PackageUtil.validateProp(self.__uriDict['propPath'])
                        and PackageUtil.validatePackage(
                            self.__uriDict['packagePath'],
                            self.__uriDict['propPath'])):
                    msg = 'The package already exists. Will NOT download duplicate package' + self.__uriDict[
                        'packagePath']
                    LOG.info(msg)
                    os.utime(self.__uriDict['packagePath'], None)
                    os.utime(self.__uriDict['propPath'], None)
                    self._updateStatus(progress=100)
                    # NOTE: this is a normal exit not an error!
                    return
                LOG.warning(
                    'The package already exists. However package prop (%s) failed validation. Downloading package.'
                    % self.__uriDict['propPath'])

            # Delete all traces of package before beginning download
            LOG.info('Cleaning up all packages for %s ' %
                     self.__uriDict['packagePath'])
            PackageUtil.cleanUpPackage(self.__uriDict['inProgressPackagePath'],
                                       self.__uriDict['packagePath'],
                                       self.__uriDict['propPath'])

            AgentThread._updateProgress(self, 0)

            if not self.__skipProp:
                # First, download .prop file
                LOG.info(
                    'Starting download of prop file %s - %s' %
                    (self.__uriDict['propUri'], self.__uriDict['propPath']))
                self.__download_prop_file()
                try:
                    self.__prop = loadPropFile(self.__uriDict['propPath'])
                except FileNotFoundError:
                    raise AgentException(
                        Errors.DC_MISSING_PROP_FILE,
                        'Prop file (%s) unable to read or did not parse' %
                        (self.__uriDict['propPath']))
            AgentThread._updateProgress(self, 2)

            self.__setProgressTimeouts()

            if self.__uriDict['scheme'] == 'http':
                # try download 3 times, with random sleep
                for _ in range(3):
                    try:
                        sotimeout = float(
                            pylons.config['download_thread_sotimeout'])
                        proxies = json.loads(
                            pylons.config['urlgrabber_proxies'])
                        urlgrabber.urlgrab(
                            self.__uriDict['uri'],
                            self.__uriDict['inProgressPackagePath'],
                            checkfunc=None if self.__skipProp else
                            (PackageUtil.validateDownload, (), {}),
                            progress_obj=DownloadProgress(self),
                            throttle=float(pylons.config['package_throttle']),
                            bandwidth=int(pylons.config['package_bandwidth']),
                            keepalive=0,
                            timeout=sotimeout,
                            proxies=proxies)
                        break
                    except Exception as exc:
                        msg = 'Download error %s - %s' % (
                            str(exc), traceback.format_exc(3))
                        LOG.warning(msg)
                        randsleep = randint(30, 60)
                        time.sleep(randsleep)

            else:
                # oops! only http and bittorrent supported now
                raise AgentException(
                    Errors.DC_UNSUPPORTED_PROTOCOL,
                    'Only http protocols is supported at the moment')

            self._checkStop()

            if not self.__skipProp:
                if (not PackageUtil.validatePackage(
                        self.__uriDict['inProgressPackagePath'],
                        self.__uriDict['propPath'])):
                    raise AgentException(
                        Errors.DC_FAILED_VALIDATE, 'Package ' +
                        self.__uriDict['packagePath'] + ' failed validation')
                os.utime(self.__uriDict['propPath'], None)
                utils.rchmod(self.__uriDict['propPath'], "777", 'no')

            LOG.info(
                'Download complete, will now rename and do validation on this file %s'
                % self.__uriDict['packagePath'])
            os.rename(self.__uriDict['inProgressPackagePath'],
                      self.__uriDict['packagePath'])
            os.utime(self.__uriDict['packagePath'], None)
            utils.rchmod(self.__uriDict['packagePath'], "777", 'no')
            LOG.info(
                "Download complete, Validation completed, updating progress to 100"
            )
            self._updateStatus(progress=100)

        except AgentException, exc:
            self._updateStatus(httpStatus=500,
                               progress=0,
                               error=exc.getCode(),
                               errorMsg=exc.getMsg())
            msg = 'Download error %s - %s' % (str(exc),
                                              traceback.format_exc(3))
            LOG.error(msg)
            raise exc
 def test_urlgrab(self):
     "module-level urlgrab() function"
     outfile = tempfile.mktemp()
     filename = urlgrabber.urlgrab('http://abat.au.example.com',
                                   filename=outfile)
     os.unlink(outfile)
Пример #53
0
#!/usr/bin/env python
""" Grabs all the sources for a given version number of elasticsearch """

import urlgrabber
from optparse import OptionParser

parser = OptionParser()
(options, args) = parser.parse_args()
print "Getting Main Source for version %s " % args[0]
urlgrabber.urlgrab(
    'https://github.com/downloads/elasticsearch/elasticsearch/elasticsearch-%s.tar.gz'
    % args[0])

#Things we don't want:
#print "Getting plugin-lang-python"
#urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/lang-python/elasticsearch-lang-python-%s.zip' % args[0])
#print "Getting plugin-lang-groovy"
#urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/lang-groovy/elasticsearch-lang-groovy-%s.zip' % args[0])
#print "Getting plugin-river-couchdb"
#urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/river-couchdb/elasticsearch-river-couchdb-%s.zip' % args[0])
#print "Getting plugin-river-rabbitmq"
#urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/river-rabbitmq/elasticsearch-river-rabbitmq-%s.zip' % args[0])
#print "Getting plugin-river-twitter"
#urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/river-twitter/elasticsearch-river-twitter-%s.zip' % args[0])
#print "Getting plugin-river-wikipedia"
#urlgrabber.urlgrab('http://elasticsearch.googlecode.com/svn/plugins/river-wikipedia/elasticsearch-river-wikipedia-%s.zip' % args[0])

print "Getting plugin-lang-javascript"
urlgrabber.urlgrab(
    'http://elasticsearch.googlecode.com/svn/plugins/lang-javascript/elasticsearch-lang-javascript-%s.zip'
    % args[0])
Пример #54
0
    def _get_mirror_list(self, repo, url):
        mirrorlist_path = os.path.join(repo.root, 'mirrorlist.txt')
        returnlist = []
        content = []
        try:
            urlgrabber_opts = {}
            self.set_download_parameters(urlgrabber_opts, url, mirrorlist_path)
            urlgrabber.urlgrab(url, mirrorlist_path, **urlgrabber_opts)
        except Exception as exc:
            # no mirror list found continue without
            return returnlist

        def _replace_and_check_url(url_list):
            goodurls = []
            skipped = None
            for url in url_list:
                # obvious bogons get ignored b/c, we could get more interesting checks but <shrug>
                if url in ['', None]:
                    continue
                try:
                    # This started throwing ValueErrors, BZ 666826
                    (s, b, p, q, f, o) = urlparse(url)
                    if p[-1] != '/':
                        p = p + '/'
                except (ValueError, IndexError, KeyError) as e:
                    s = 'blah'

                if s not in ['http', 'ftp', 'file', 'https']:
                    skipped = url
                    continue
                else:
                    goodurls.append(urlunparse((s, b, p, q, f, o)))
            return goodurls

        try:
            with open(mirrorlist_path, 'r') as mirrorlist_file:
                content = mirrorlist_file.readlines()
        except Exception as exc:
            self.error_msg("Could not read mirrorlist: {}".format(exc))

        try:
            # Try to read a metalink XML
            for files in etree.parse(mirrorlist_path).getroot():
                file_elem = files.find(METALINK_XML + 'file')
                if file_elem.get('name') == 'repomd.xml':
                    _urls = file_elem.find(METALINK_XML +
                                           'resources').findall(METALINK_XML +
                                                                'url')
                    for _url in _urls:
                        # The mirror urls in the metalink file are for repomd.xml so it
                        # gives a list of mirrors for that one file, but we want the list
                        # of mirror baseurls. Joy of reusing other people's stds. :)
                        if not _url.text.endswith("/repodata/repomd.xml"):
                            continue
                        returnlist.append(
                            _url.text[:-len("/repodata/repomd.xml")])
        except Exception as exc:
            # If no metalink XML, we try to read a mirrorlist
            for line in content:
                if re.match('^\s*\#.*', line) or re.match('^\s*$', line):
                    continue
                mirror = re.sub('\n$', '', line)  # no more trailing \n's
                (mirror, count) = re.subn('\$ARCH', '$BASEARCH', mirror)
                returnlist.append(mirror)

        returnlist = _replace_and_check_url(returnlist)

        try:
            # Write the final mirrorlist that is going to be pass to Zypper
            with open(mirrorlist_path, 'w') as mirrorlist_file:
                mirrorlist_file.write(os.linesep.join(returnlist))
        except Exception as exc:
            self.error_msg(
                "Could not write the calculated mirrorlist: {}".format(exc))
        return returnlist
Пример #55
0
    def download_file(self, uri, lhash, fsize,
                      local_path=None, throttle=0):
        """Downloads a package from specified uri.

        Args:
            uri (strings): Uri of the file to download.

            local_path (string): Full path where the package is be saved.
                Do not include a file name.

            throttle (int): Number of kilobytes to throttle the bandwidth by.
                If throttle == 0, throttling is disabled.

        Returns:
            True if package downloaded successfully. False otherwise.
        """

        # urlgrab doesn't like unicode.
        uri = str(uri)
        if not lhash:
            lhash = ''

        success = False
        hash_status = 'not verified'
        fsize_match = False
        path = ''

        if throttle != 0:
            throttle *= 1024

        try:

            if local_path and len(uri) > 0:

                name = uri.split('/')[-1]
                if '?' in name:
                    name = name.split('?')[0]

                path = os.path.join(local_path, name)

                urlgrab(uri, filename=path, throttle=throttle)

            elif len(uri) > 0 and not local_path:

                path = urlgrab(uri, throttle=throttle)

        except Exception as e:
            logger.exception(e)

        if os.path.exists(path):
            if len(lhash) > 0:
                hash_match = hash_verifier(orig_hash=lhash, file_path=path)

                if hash_match['pass']:
                    hash_status = 'verified'
                    fsize_match = True
                    success = True

            elif fsize and len(lhash) < 1:
                if os.path.getsize(path) == fsize:
                    hash_status = 'no hash'
                    fsize_match = True
                    success = True

        return(success, hash_status, fsize_match)