Python urlopen примеры, urlgrabber.urlopen Python примеры использования

Пример #1

0

Показать файл

Файл: helios_urlgrabber.py Проект: lgastako/helios

 def process_event(self, event):
     url, data = self.build_url_and_data(event)
     headers = {"Content-Type": "application/json"}
     try:
         urlopen(url, data=data, http_headers=headers.items())
         return True
     except IOError:
         return False

Пример #2

0

Показать файл

Файл: fetcher.py Проект: hknyldz/pisitools

    def test(self, timeout=3):
        import urlgrabber

        try:
            urlgrabber.urlopen(self.url.get_uri(),
                           http_headers = self._get_http_headers(),
                           ftp_headers  = self._get_ftp_headers(),
                           proxies      = self._get_proxies(),
                           timeout      = timeout,
                           user_agent   = 'PiSi Fetcher/' + pisi.__version__)
        except urlgrabber.grabber.URLGrabError:
            return False

        return True

Пример #3

0

Показать файл

Файл: version.py Проект: 01org/mic

def versionFromFile(f):
    """Given a file or URL, look for a line starting with #version= and
       return the version number.  If no version is found, return DEVEL.
    """
    v = DEVEL

    fh = urlopen(f)

    while True:
        try:
            l = fh.readline()
        except StopIteration:
            break

        # At the end of the file?
        if l == "":
            break

        if l.isspace() or l.strip() == "":
            continue

        if l[:9] == "#version=":
            v = stringToVersion(l[9:].rstrip())
            break

    fh.close()
    return v

Пример #4

0

Показать файл

Файл: ApplicationConfiguration.py Проект: dkliban/imagefactory

    def __parse_jeos_images(self):
        log = logging.getLogger('%s.%s' % (__name__, self.__class__.__name__))
        config_urls = self.configuration['jeos_config']
        for url in config_urls:
            filehandle = urlopen(str(url))
            line = filehandle.readline().strip()
            line_number = 1

            while line:
                # Lines that start with '#' are a comment
                if line[0] == "#":
                    pass
                # Lines that are zero length are whitespace
                elif len(line.split()) == 0:
                    pass
                else:
                    image_detail = line.split(":")
                    if len(image_detail) >= 6:
                        self.__add_jeos_image(image_detail)
                    else:
                        log.warning("Failed to parse line %d in JEOS config (%s):\n%s" % (line_number, url, line))

                line = filehandle.readline()
                line_number += 1

            filehandle.close()

Пример #5

0

Показать файл

Файл: __init__.py Проект: RedHatQE/cloudqe-aeolus

    def get_remote_hash(self, branch):
        '''Return the git-hash for the most recent commit on the specified
           branch'''
        assert isinstance(branch, str), "branch argument must be a string"

        u = urlparse.urlparse(self.git_url)
        if u.scheme == 'git':
            if u.netloc == 'github.com':
                # Formulate API call
                json_url = "http://%s/api/v2/json/repos/show%s/branches" \
                    % (u.netloc, re.sub(r'\.git$', '', u.path))
                json_data = json.loads(urlgrabber.urlopen(json_url).read())
                if isinstance(json_data, dict):
                    return json_data.get('branches', {}).get(branch, 'UNKNOWN')
                else:
                    logging.error("Unknown json data format: %s" % type(json_data))
            else:
                (rc, out) = call("git ls-remote %s refs/heads/%s" % (self.git_url, branch))
                if rc == 0:
                    out = out.strip() # yank off newline char
                    return out.split()[0]
                else:
                    logging.error("Unable to query repository: %s" % u)
        else:
            logging.error("Unhandled SCM format: %s" % u.scheme)

Пример #6

0

Показать файл

Файл: smolt_mythtv.py Проект: DocOnDev/mythtv

def runMythTuner():
    #Yes this is bad, but the bindings can't be trusted.
    role=runMythRole()
    num_encoders = -1
    if role == "StandAlone" or role == "Master backend" or role == "Master backend with Frontend":
        fencoder = False
        url="http://localhost:6544"
        try:
            web_page = urlgrabber.urlopen(url).readlines()
        except:
            return 0
        num_encoders = 0
        for line in web_page:
            line = line.strip()
            if line == '<h2>Encoder status</h2>':
                fencoder= True
                continue
            if fencoder:
                #print line
                encoders = line.split('.<br />')
                for encoder in encoders:
                    if encoder.find("currently not connected") == -1 and encoder.startswith("Encoder"):
                        num_encoders = num_encoders + 1
                if line == '<div class="content">':
                    break
    return num_encoders

Пример #7

0

Показать файл

Файл: __init__.py Проект: AsherBond/silvereye

    def setInstallData(self, anaconda):
        silvereye.InstallClass.setInstallData(self, anaconda)
        anaconda.id.firewall.portlist.extend([ '53:tcp',
                                               '53:udp',
                                               '67:udp',
                                               '3260:tcp',
                                               '8443:tcp',
                                               '8772:tcp',
                                               '8773:tcp',
                                               '8774:tcp',
                                               '8888:tcp'])

        if flags.cmdline.has_key("eucaconf"):
            try:
                f = urlgrabber.urlopen(flags.cmdline["eucaconf"])
                eucaconf = open('/tmp/eucalyptus.conf', 'w')
                eucaconf.write(f.read())
                f.close()
                eucaconf.close()
            except urlgrabber.grabber.URLGrabError as e:
                if anaconda.intf:
                    rc = anaconda.intf.messageWindow( _("Warning! eucalyptus.conf download failed"),
                                                      _("The following error was encountered while"
                                                        " downloading the eucalyptus.conf file:\n\n%s" % e),
                                   type="custom", custom_icon="warning",
                                   custom_buttons=[_("_Exit"), _("_Install anyway")])
                    if not rc:
                        sys.exit(0)
                else:
                    sys.exit(0)
        else:
            pass

Пример #8

0

Показать файл

Файл: main.py Проект: charsyam/pythoncrawl

def f(idx, q,r):
    path = "data%s"%(idx)
    os.makedirs(path)
    while True:
        item = q.get()
        if( item.item_type == ITEM_QUIT ):
            break;

        count = 0
        localQueue = Queue()
        current = item.data
        while True:
            print current
            fo = urlopen(current)
            data = fo.read()
            name = "%s/%s"%(path,count)
            fw = open( name, "w" )
            count = count + 1
            fw.write(data)
            fw.close()
            fo.close()
            p = MyHTMLParser()
            try:
                p.feed(data)
            except:
                pass

            for href in p.hrefs:
                print item.data, ": ", href

            try:
                current = localQueue.get_nowait()
            except:
                break;

Пример #9

0

Показать файл

Файл: peapod.py Проект: huwlynes/peapod

 def get( self ):
     """
     Convert feed source (be it opml, RSS etc) into a list of dictionaries
     containing titles and urls. This list of dictionaries can then be used
     to regenerate the user config file.
     """
     #using urlgrabber so it doesn't matter whether feed is a file or a url
     logger.debug("Opening feed: " + self.feed)
     fd = urlopen( self.feed )
     feed = {}
     #is this an OPML file?
     try:
         outlines = OPML.parse( fd ).outlines
         logger.debug("Feed is OPML")
         for opmlfeed in outlines:
             feed = {}
             feed["title"] = opmlfeed["title"]
             feed["url"] = opmlfeed["xmlUrl"]
             self.feedlist.append( feed )
             logger.debug("Feed has been imported: %s - %s" % (feed["title"], feed["url"]))
     except Exception, e:
         feed = {}
         try:
             if self.title:
                 feed["title"] = self.title
             else:
                 outlines = feedparser.parse( self.feed )["feed"]
                 feed["title"] = outlines.title
             feed["url"] = self.feed
             self.feedlist.append(feed)
             logger.debug("Feed has been imported: %s - %s" % (feed["title"], feed["url"]))
         except Exception, e:
             print "Feedparser exception:", e
             sys.exit(-1)

Пример #10

0

Показать файл

Файл: parser.py Проект: 274914765/python

def preprocessKickstart (file):
    """Preprocess the kickstart file, given by the filename file.  This
        method is currently only useful for handling %ksappend lines,
        which need to be fetched before the real kickstart parser can be
        run.  Returns the location of the complete kickstart file.
    """
    try:
        fh = urlopen(file)
    except grabber.URLGrabError, e:
        raise IOError, formatErrorMsg(0, msg=_("Unable to open input kickstart file: %s") % e.strerror)

Пример #11

0

Показать файл

Файл: trowelfunctions.py Проект: vickimo/plt

def findUrl(arglist):
	# arglist[0] is the urlList to search (set() removes duplicates)
	this_urllist = set(arglist[0])
	# arglist[1] is the FE to find
	this_FE = arglist[1]
	result = []
	for this_url in this_urllist:
		soup = BeautifulSoup(urlopen(this_url))
		if soup.find_all(text = re.compile(this_FE)): result.append(this_url)
	return result

Пример #12

0

Показать файл

Файл: versionlock.py Проект: balagopalraj/clearlinux

def _read_locklist():
    locklist = []
    try:
        llfile = urlgrabber.urlopen(fileurl)
        for line in llfile.readlines():
            if line.startswith('#') or line.strip() == '':
                continue
            locklist.append(line.rstrip())
        llfile.close()
    except urlgrabber.grabber.URLGrabError, e:
        raise PluginYumExit('Unable to read version lock configuration: %s' % e)

Пример #13

0

Показать файл

Файл: pyquery_test.py Проект: slieer/py

def getHtml(url, showUrl=False):
    if showUrl :
        logging.info(url)    

    try:
        page = urlopen(url)
        html = page.read()
        page.close()
        return html
    except URLGrabError:
        logging.error('exce url:' + url)
    return ""

Пример #14

0

Показать файл

Файл: china_regional.py Проект: slieer/py

def getHtml(url, showUrl=False):
    if showUrl :
        print(url)    

    try:
        page = urlopen(url)
        html = page.read()
        page.close()
        return html
    except URLGrabError:
        print('exce url', url)
    return ""

Пример #15

0

Показать файл

Файл: ApplicationConfiguration.py Проект: jrafanie/imagefactory

    def __parse_jeos_images(self):
        log = logging.getLogger('%s.%s' % (__name__, self.__class__.__name__))
        config_urls = self.configuration['jeos_config']
        # Expand directories from the config and url-ify files
	# Read inlist - replace directories with their contents
	nextlist = [ ]
	for path in config_urls:
	    if os.path.isdir(path):
		for filename in os.listdir(path):
		    fullname = os.path.join(path, filename)
		    if os.path.isfile(fullname):
			nextlist.append(fullname)
	    else:
		nextlist.append(path)

	# Read nextlist - replace files with file:// URLs
	finalist = [ ]
	for path in nextlist:
	    if os.path.isfile(path):
		finalist.append("file://" + path)
	    else:
		finalist.append(path)


        for url in finalist:
            try:
                filehandle = urlopen(str(url))
                line = filehandle.readline().strip()
            except:
                log.warning("Failed to open JEOS URL (%s)" % url)
                continue
            line_number = 1

            while line:
                # Lines that start with '#' are a comment
                if line[0] == "#":
                    pass
                # Lines that are zero length are whitespace
                elif len(line.split()) == 0:
                    pass
                else:
                    image_detail = line.split(":")
                    if len(image_detail) >= 6:
                        self.__add_jeos_image(image_detail)
                    else:
                        log.warning("Failed to parse line %d in JEOS config (%s):\n%s" % (line_number, url, line))

                line = filehandle.readline()
                line_number += 1

            filehandle.close()

Пример #16

0

Показать файл

Файл: retriever.py Проект: asilx/rossi-demo

def get(url):
    mod_url = url
    if url.find(PACKAGE_PREFIX) == 0:
        mod_url = url[len(PACKAGE_PREFIX):]
        pos = mod_url.find('/')
        if pos == -1:
            raise Exception("Could not parse package:// format into file:// format for "+url)

        package = mod_url[0:pos]
        mod_url = mod_url[pos:]
        package_path = rospack_find(package)

        mod_url = "file://" + package_path + mod_url;

    return urlgrabber.urlopen(mod_url)

Пример #17

0

Показать файл

Файл: trowelfunctions.py Проект: vickimo/plt

def findText(arglist):
	# arglist[0] is the urlList to search (set() removes duplicates)
	this_urllist = set(arglist[0])
	#arglist[1] is the FE to find
	this_FE = arglist[1]
	parents_visited = []
	result = []
	for this_url in this_urllist:
		soup = BeautifulSoup(urlopen(this_url))
		for this_tag in soup.find_all(text = re.compile(this_FE)):
			this_parent = this_tag.parent
			if this_parent in parents_visited: continue
			parents_visited.append(this_parent)
			this_text = ''
			for this_sibling in this_tag.parent.children: this_text += this_sibling.string
			result.append(this_text)
	return result

Пример #18

0

Показать файл

Файл: util.py Проект: kalev/bodhi

def sanity_check_repodata(myurl):
    """
    Sanity check the repodata for a given repository.
    Initial implementation by Seth Vidal.
    """
    myurl = str(myurl)
    tempdir = tempfile.mkdtemp()
    errorstrings = []
    if myurl[-1] != '/':
        myurl += '/'
    baseurl = myurl
    if not myurl.endswith('repodata/'):
        myurl += 'repodata/'
    else:
        baseurl = baseurl.replace('repodata/', '/')

    rf = myurl + 'repomd.xml'
    try:
        rm = urlgrabber.urlopen(rf)
        repomd = repoMDObject.RepoMD('foo', rm)
        for t in repomd.fileTypes():
            data = repomd.getData(t)
            base, href = data.location
            if base:
                loc = base + '/' + href
            else:
                loc = baseurl + href

            destfn = tempdir + '/' + os.path.basename(href)
            dest = urlgrabber.urlgrab(loc, destfn)
            ctype, known_csum = data.checksum
            csum = checksum(ctype, dest)
            if csum != known_csum:
                errorstrings.append("checksum: %s" % t)

            if href.find('xml') != -1:
                decompressed = decompress(dest)
                retcode = subprocess.call(['/usr/bin/xmllint', '--noout', decompressed])
                if retcode != 0:
                    errorstrings.append("failed xml read: %s" % t)

    except urlgrabber.grabber.URLGrabError, e:
        errorstrings.append('Error accessing repository %s' % e)

Пример #19

0

Показать файл

Файл: parser.py Проект: 274914765/python

    def readKickstart(self, f, reset=True):
        """Process a kickstart file, given by the filename f."""
        if reset:
            self._reset()

        # an %include might not specify a full path.  if we don't try to figure
        # out what the path should have been, then we're unable to find it
        # requiring full path specification, though, sucks.  so let's make
        # the reading "smart" by keeping track of what the path is at each
        # include depth.
        if not os.path.exists(f):
            if self.currentdir.has_key(self._includeDepth - 1):
                if os.path.exists(os.path.join(self.currentdir[self._includeDepth - 1], f)):
                    f = os.path.join(self.currentdir[self._includeDepth - 1], f)

        cd = os.path.dirname(f)
        if not cd.startswith("/"):
            cd = os.path.abspath(cd)
        self.currentdir[self._includeDepth] = cd

        try:
            fh = urlopen(f)
        except grabber.URLGrabError, e:
            raise IOError, formatErrorMsg(0, msg=_("Unable to open input kickstart file: %s") % e.strerror)

Пример #20

0

Показать файл

Файл: download_game_music_bundles.py Проект: Terr/utils

)

(options, args) = parser.parse_args()

if len(args) == 0:
    parser.error('One or more bundle keys are required')

progress_printer = ProgressPrint()
grabber = URLGrabber(prefix=options.gmb_url,
                     progress_obj=progress_printer)

# Download the albums for each key
for key in args:
    # Get download page and grab all download URLs
    download_page_url = urljoin(options.gmb_url, '/download?key=%s' % key)
    download_page = urlopen(download_page_url)
    html = download_page.read()
    soup = BeautifulSoup(html, 'lxml')
    download_page.close()

    # Find all download links
    regex_download_link = re.compile('/download\?.*')
    download_links = [x['href'] for x in soup.find_all('a', href=regex_download_link)]
    album_urls = merge_album_links(download_links)

    print 'Going to download %d album(s)' % len(album_urls)

    for url in album_urls.values():
        # Switch to output directory as urlgrabber downloads to the current dir
        os.chdir(options.output_dir)

Пример #21

0

Показать файл

Файл: test_grabber.py Проект: pombredanne/urlgrabber-1

 def test_urlopen(self):
     "module-level urlopen() function"
     fo = urlgrabber.urlopen('http://www.python.org')
     fo.close()

Пример #22

0

Показать файл

Файл: chm_file_procv7.py Проект: pymat/book-proj

        print 'Archmage could not extract '+chmFile+', continuing from next loop iteration'
        nameStr='touch \''+chmFilePath+'!!!_ArchiveExtractFail\''
        os.system(nameStr)        
        continue #skip this file

    #mainPageFile=searchRes[1]['Main Page']
    if (not searchRes[0]):
        print 'pychm searched for ISBN: searchRes empty for '+chmFile+', continuing to next loop iteration'
        nameStr='touch \''+chmFilePath+'!!!_ISBNSearchFail\''
        os.system(nameStr)
        continue
    
    try:
        mainPageFile=searchRes[1].items()[0][1]
        mainPagePath=tempPath+mainPageFile
        page=urlgrabber.urlopen(mainPagePath)
    except Exception,e:
        mainPageFile=searchRes[1].items()[0][1]
        mainPageFile=mainPageFile.lower()
        mainPagePath=tempPath+mainPageFile
        page=urlgrabber.urlopen(mainPagePath)

    soup = BeautifulSoup(page)
    
    resSoup=soup.body.find(text=re.compile(r'ISBN'))
    
    #here, check to see how many characters come after 'ISBN'
    #if the number is more than 8, we likely are given the ISBN number in
    #resSoup.string, and so we should grab it
    ISBNStart=resSoup.string.find('ISBN')
    ISBNEnd=ISBNStart+len('ISBN')

Пример #23

0

Показать файл

Файл: leaders.py Проект: patricklam/plam.webpage

def index(req):
    rv = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>ECE 459: Programming for Performance</title>
<script type="text/javascript" src="/ui/alternate_rows.js"></script>
<link rel="stylesheet" type="text/css" href="alternating-rows.css" />
<style type="text/css">
  td {vertical-align:top}
</style>
<style type="text/css" media="screen">
@import url("http://www.uwaterloo.ca/css/UWblank.css");
</style>
<!-- TemplateBeginIf cond="collage" -->
<!-- <style type="text/css" media="screen">
@import url("http://www.uwaterloo.ca/css/UWhome.css");
</style>-->
<!-- TemplateEndIf --><!-- TemplateBeginIf cond="rightnavmenu==false" -->
<style type="text/css" media="screen">
@import url("http://www.uwaterloo.ca/css/UW2col.css");
</style>
<!-- TemplateEndIf --><!-- TemplateBeginIf cond="rightnavmenu" -->
<!-- <style type="text/css" media="screen">
@import url("http://www.uwaterloo.ca/css/UW3col.css");
</style> -->
<!-- TemplateEndIf -->
<!-- conditional comment added for IE 6 printing, IE 5.5 will not print this page very well -->
<!--[if IE 6]>
<style type="text/css" media="print">
@import url("http://www.uwaterloo.ca/css/UWprint.css");
</style>
<![endif]-->
<!-- this print will work in W3 Standard compliant browsers -->
<style type="text/css">
@import url("http://www.uwaterloo.ca/css/UWprint.css") print;

</style>
<style type="text/css" media="screen">
@import url("/css/ece.css");
</style>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<meta http-equiv="Content-Language" content="en-us" />
<!-- fill in below according to your site -->
<meta name="description" content="Programming for Performance" />
<meta name="keywords" content="programming parallelization" />
<meta name="author" content="Patrick Lam" />
<meta name="author" content="Design - Jesse Rodgers ([email protected])" />
<meta name="version" content="XHTML Version 1.0p1"  />
<!-- optional regions -->
<!-- TemplateParam name="submenu" type="text" value="0" -->
<!-- TemplateParam name="collage" type="boolean" value="false" -->
<!-- TemplateParam name="rightnavmenu" type="boolean" value="false" -->

</head>
<body>
<!-- header -->
<div id="header">
	<div id="clfbar">
		<div id="uwlogo">
			<a href="http://www.uwaterloo.ca">
            <img src="http://www.uwaterloo.ca/images/template/uwlogo.gif" alt="Link to the University of Waterloo home page" width="105" height="70" border="0" /> </a>
		</div>
		<div id="searchbox">

		<!-- form script is located on info, there are options though. Information is at http://web.uwaterloo.ca/clftemplate/search.html -->
         	<form action="http://info.uwaterloo.ca/clfscripts/uwsearch.php" method="post" name="search" target="_blank" id="search" title="search" dir="ltr" lang="en">
    			Search 
				<input type="radio" name="site" value="ece.uwaterloo.ca" checked="checked"/>
				in ECE
				<input type="radio" name="site" value="uwaterloo.ca"/>
    			all of UW
    			<input name="searchterm" type="text" id="searchterm" class="google" accesskey="s" tabindex="2" size="20" />  
				<input name="submit" type="submit" id="submit" class="google" tabindex="3" value="Search" />
  			</form>
        </div>

		<div id="wordmark"> 
		<h1><a href="index.html">
		<!-- replace title image with your own DO NOT FORGET ALT TAG!!! -->
		<img src="/files/clear.gif" alt="Department of Electrical and Computer Engineering" width="400" height="30" border="0" />
		</a></h1>
	  </div>
	</div>
</div>
<span class="none"><a href="#content">Skip to the content of the web site.</a></span>

<!-- primary nav, add or delete links as you desire --> 
<div id="primarynavarea"> 
  <ul id="primarynav"> 
  <li><a href="/p4p/"><b>Home</b></a></li>
  <li><a href="/p4p/leaderboard/leaders.py">A3 Leaderboard</a></li>
  <li><a href="/p4p/notes/">Lecture notes</a></li>
  <li><a href="/p4p/exams.shtml">Exam information</a></li>
  <li><a href="/p4p/files/assignment-01.pdf">Assignment 1 (PDF)</a></li>
  <li><a href="/p4p/files/assignment-02.pdf">Assignment 2 (PDF)</a></li>
  <li><a href="/p4p/files/assignment-03.pdf">Assignment 3 (PDF)</a></li>
  <li><a href="/p4p/files/assignment-04.pdf">Assignment 4 (PDF)</a></li>
<!--
  <li><a href="/p4p/a4notes.shtml">Assignment 4 notes</a></li>-->
<!-- TemplateEndIf --> 	  
  </ul>
</div>

<!-- content -->
<a name="content" id="content"></a>
<!-- TemplateBeginIf cond="collage == false" -->
<div id="contentbar"> <!-- this causes the warning about p tags on saving the template, just ignore -->
<!-- TemplateBeginEditable name="collage == false" -->
<h2> ECE459: Programming for Performance, W13 </h2> 
<!-- TemplateEndEditable -->
</div>
<!-- TemplateEndIf -->
													<!-- DO NOT FORGET ALT TAG!!!! -->
<!-- TemplateBeginIf cond="collage" -->

<!-- <div id="collage"> <img src="images/Collage3.jpg" alt=" " /> </div> -->
<!-- TemplateEndIf -->

<div id="primarycontarea">
  <div id="primarycontent">
    <!-- InstanceBeginEditable name="primarycontent" -->
    <table>
"""
    lb = urlopen("http://ece459-1.uwaterloo.ca/leaders.csv")
    reader = csv.reader(lb)
    parity = 0
    for row in reader:
        parity = 1-parity
        s = ""
        if (parity == 1):
            s = s + "background:#ddd"
        rv = rv + "<tr style='"+s+"'><td style='padding-right:1em'>"+row[0]+"</td><td style='text-align:right'>"+row[1]+"</td></tr>"

    lb.close()
    rv = rv + """</table>
</div>
</div>
<!-- footer -->
<div id="footer">	
	<div id="departmentaddress">
	 <a href="http://campaign.uwaterloo.ca"><img src="http://www.uwaterloo.ca/images/template/littlecampaignlogo.gif" alt="Campaign Waterloo" class="campaignlogo" /></a>
	 <p>

	    Patrick Lam <br />
Department of Electrical and Computer Engineering<br />
University of Waterloo<br />
200 University Avenue West<br />
Waterloo, Ontario, Canada N2L 3G1<br />
519 888 4567 ext. 36433 <br />
<br />
<a href="mailto:p.lam[at]ece.uwaterloo.ca">contact us</a> | <a href="mailto:p.lam[at]ece.uwaterloo.ca">give us feedback</a> | <a href="http://www.uwaterloo.ca">University of Waterloo Home Page </a><br />

	  </p>
  </div>
</div>
</body>

<!-- InstanceEnd --></html>
"""
    return rv

Пример #24

0

Показать файл

Файл: check_repo_publisher.py Проект: oVirt/releng-tools

def main():
    server = "http://plain.resources.ovirt.org"
    if len(sys.argv) != 3:
        print("Usage:")
        print(
            "{command} {job} {repo}".format(
                command=sys.argv[0],
                job=(
                    "http://jenkins.ovirt.org/view/Publishers/job/"
                    "publish_ovirt_rpms_nightly_3.5/73/console"
                ),
                repo="/repos/ovirt-3.5-pre",
            )
        )
        sys.exit(1)
    job = sys.argv[1]
    baseurl = sys.argv[2]

    u = urlgrabber.urlopen(job)
    content = u.read()
    u.close()

    required = []
    for line in content.splitlines():
        if line.find('SSH: put') != -1:
            filename = line[line.find('[')+1:line.find(']')]
            if filename not in required:
                required.append(filename)
                if filename.endswith('.tar.gz'):
                    required.append(filename + '.sig')

    print("------------------------------")
    print("Checking Jenkins jobs goodness")
    print("------------------------------")

    print("publisher job: %s" % job)
    print(
        "repository: {server}{baseurl}\n\n".format(
            server=server,
            baseurl=baseurl,
        )
    )

    m = re.compile(r'^(?P<package>([a-zA-Z0-9]+\-)+[0-9\.]+[_0-9a-zA-Z\.]*)')
    for filename in required:
        if filename.endswith('.src.rpm'):
            package = m.match(filename)
            if package is not None:
                tarball = package.groupdict()['package'] + ".tar.gz"
                if tarball not in required:
                    print(
                        (
                            "missing sources : {tarball}\n"
                            "for rpm: {rpm}\n"
                            "found:\n"
                        ).format(
                            tarball=tarball,
                            rpm=filename,
                        )
                    )
                    for x in required:
                        if (
                            x.startswith(package.groupdict()['package']) and
                            x.endswith('tar.gz')
                        ):
                            print(x)

    not_required = []
    queue = collections.deque()
    queue.append(
        "{server}{baseurl}".format(
            server=server,
            baseurl=baseurl,
        )
    )

    m = re.compile('href="([^"]*)"')

    print(
        "\n\n\n"
        "-------------------------------------------------------\n"
        "Checking expected repository content from publisher job\n"
        "-------------------------------------------------------\n"
    )

    while queue:
        newitem = queue.popleft()
        print("processing %s" % newitem)
        u = urlgrabber.urlopen(newitem)
        root = u.read()
        u.close()
        for x in m.findall(root):
            if not (
                x.startswith('?') or
                x.startswith('/')
            ):
                if (
                    x.endswith('.rpm') or
                    x.endswith('.iso') or
                    x.endswith('.exe') or
                    x.endswith('.gz') or
                    x.endswith('.sig') or
                    x.endswith('.bz2') or
                    x.endswith('.xml') or
                    x.endswith('.zip')
                ):
                    if x in required:
                        required.remove(x)
                    else:
                        not_required.append(x)
                else:
                    queue.append(
                        "{baseurl}/{x}".format(
                            baseurl=newitem,
                            x=x,
                        )
                    )

    print(
        "The following packages were in the publisher job and are "
        "missing in the repo:"
    )
    for x in required:
        print x

Пример #25

0

Показать файл

Файл: anaconda_stack.py Проект: toomanytims/stacki

    # comment out the next line to make exceptions non-fatal
    from exception import initExceptionHandling
    anaconda.mehConfig = initExceptionHandling(anaconda)

    # add our own additional signal handlers
    signal.signal(signal.SIGUSR2, lambda signum, frame: anaconda.dumpState())

    anaconda.setDispatch()

    # download and run Dogtail script
    if opts.dogtail:
       try:
           import urlgrabber

           try:
               fr = urlgrabber.urlopen(opts.dogtail)
           except urlgrabber.grabber.URLGrabError, e:
               log.error("Could not retrieve Dogtail script from %s.\nError was\n%s" % (opts.dogtail, e))
               fr = None
                           
           if fr:
               (fw, testcase) = mkstemp(prefix='testcase.py.', dir='/tmp')
               os.write(fw, fr.read())
               fr.close()
               os.close(fw)
               
               # download completed, run the test
               if not os.fork():
                   # we are in the child
                   os.chmod(testcase, 0o755)
                   os.execv(testcase, [testcase])

Пример #26

0

Показать файл

Файл: io.py Проект: kholia/pyrpm

 def open(self, unused_mode="r"):
     try:
         self.fd = urlgrabber.urlopen(self.source)
     except urlgrabber.grabber.URLGrabError, e:
         raise IOError, str(e)

Пример #27

0

Показать файл

Файл: MSR_find_lecs.py Проект: pymat/spider-proj

#use mimms?

from BeautifulSoup import BeautifulSoup
import re
import urlgrabber
import urllib
import urllib2

MSRpageURL="http://www.researchchannel.org/prog/displayinst.aspx?fID=880&pID=480"

MSRbaseURL="http://content.digitalwell.washington.edu/msr/external_release_talks_12_05_2005/"

UWCSE2007pageURL="http://www.researchchannel.org/prog/displayseries.aspx?path=1&fID=2318&pID=497"
UWCSE2008pageURL="http://www.researchchannel.org/prog/displayseries.aspx?path=1&fID=4946&pID=497"

page=urlgrabber.urlopen(MSRpageURL)
soup=BeautifulSoup(page)
lecResList=soup.findAll('a','bluelink')

for lecRes in lecResList:
    lecTitle=lecRes.contents[0]  
    print lecTitle
    
    lecInfoURL=lecRes.attrs[2][1]    
    lecPage=urlgrabber.urlopen(lecInfoURL)
    lecSoup=BeautifulSoup(lecPage)

    try:
        lecDate=lecSoup.findAll('span',{'id':'mediaGroupProductionDate'})[0].contents[0]
    except Exception, e:
        print e

Пример #28

0

Показать файл

Файл: aws_file_lookup.py Проект: pymat/kate-look-up

 titleCln=re.sub(r'Book #[\d]','',titleCln)
 authorCln=author.rstrip().strip('"').replace('Author: ','')
 authorCln=authorCln.replace('/',' ')
 
 
 titleQry=titleCln.replace(' ','+');
 authorQry=authorCln.replace(' ','+')
 queryStr=titleQry+'+'+authorQry
 titleSearchURL='http://books.google.com/books?client=firefox-a&um=1&q='+titleQry+'&btnG=Search+Books'
 advSearchURL='http://books.google.com/books?as_q=&num=10&client=firefox-a&btnG=Google+Search&as_epq=&as_oq=&as_eq=&as_libcat=0&as_brr=0&lr=&as_vt='+titleQry+'&as_auth='+authorQry+'&as_pub=&as_sub=&as_drrb=c&as_miny=&as_maxy=&as_isbn='
 basSearchURL='http://books.google.com/books?client=firefox-a&um=1&q='+queryStr+'&btnG=Search+Books'
 
 
 
 searchURL=advSearchURL;
 searchResPage=urlgrabber.urlopen(searchURL)
 searchResSoup=BeautifulSoup(searchResPage)
 
 bookLinkList=searchResSoup.find('h2','resbdy');
 
 if not bookLinkList:
     searchURL=basSearchURL;
     searchResPage=urlgrabber.urlopen(searchURL)
     searchResSoup=BeautifulSoup(searchResPage)
 
     bookLinkList=searchResSoup.find('h2','resbdy');
     
 if not bookLinkList:
     searchURL=titleSearchURL;
     searchResPage=urlgrabber.urlopen(searchURL)
     searchResSoup=BeautifulSoup(searchResPage)

Пример #29

0

Показать файл

Файл: test_grabber.py Проект: mer-tools/python-urlgrabber

 def test_urlopen(self):
     "module-level urlopen() function"
     fo = urlgrabber.urlopen('http://www.python.org')
     fo.close()

Пример #30

0

Показать файл

Файл: url.py Проект: Biprodeep/RS485-Data-Transfer-Sensors-Faucets

import urllib2
import string
import commands

from urlgrabber import urlopen

from bs4 import BeautifulSoup

import subprocess

mac_id = commands.getstatusoutput(
    "ethtool -P eth0 | awk -F \' \' \'{print $3}\'")
url = str('http://vps.sensorfaucets.com/stock_db/regester.php?mac='
          ) + mac_id[1] + str('&type=HUB')
html = urlopen(url).read()
parsed_html = BeautifulSoup(html, "html.parser")
print parsed_html.get_text()
#html.close()
url = str(
    'http://vps.sensorfaucets.com/stock_db/checking.php?mac=') + mac_id[1]

while 1:
    htmlc = urlopen(url).read()
    parsed_htmlc = BeautifulSoup(htmlc, "html.parser")
    s = parsed_htmlc.get_text()
    p = s[0:5] + s[26:50]
    print p

Пример #31

0

Показать файл

Файл: __init__.py Проект: abhat91/ros_osx

def get(url):
    return urlgrabber.urlopen(get_filename(url))

Пример #32

0

Показать файл

Файл: anaconda_stack.py Проект: bsanders/stacki

    # comment out the next line to make exceptions non-fatal
    from exception import initExceptionHandling
    anaconda.mehConfig = initExceptionHandling(anaconda)

    # add our own additional signal handlers
    signal.signal(signal.SIGUSR2, lambda signum, frame: anaconda.dumpState())

    anaconda.setDispatch()

    # download and run Dogtail script
    if opts.dogtail:
       try:
           import urlgrabber

           try:
               fr = urlgrabber.urlopen(opts.dogtail)
           except urlgrabber.grabber.URLGrabError, e:
               log.error("Could not retrieve Dogtail script from %s.\nError was\n%s" % (opts.dogtail, e))
               fr = None
                           
           if fr:
               (fw, testcase) = mkstemp(prefix='testcase.py.', dir='/tmp')
               os.write(fw, fr.read())
               fr.close()
               os.close(fw)
               
               # download completed, run the test
               if not os.fork():
                   # we are in the child
                   os.chmod(testcase, 0o755)
                   os.execv(testcase, [testcase])

Пример #33

0

Показать файл

Файл: test_grabber.py Проект: CongLi/autotest-client-tests

 def test_urlopen(self):
     "module-level urlopen() function"
     fo = urlgrabber.urlopen('http://abat.au.example.com')
     fo.close()

Python urlopen примеры использования