for base_url in base_urls: print "scraping %s" % base_url doc = parse(base_url).getroot() links = doc.cssselect('span.pl a') if links: for link in links: subdoc_url = link.get('href') subdoc = parse(subdoc_url).getroot() images = subdoc.cssselect('img#iwi') print "\t%s %d images" % (subdoc_url, len(images)) for image in images: img_url = image.get('src') if img_url not in images: print "\t\t%s" % (img_url) brew.publish('image', img_url) history.append( img_url ) else: print "No links found on "+base_url print 'Sent %d images' % len(history) time.sleep(2.5) # Catch ctrl+c except (KeyboardInterrupt, SystemExit): print("Exiting!") # closing out the app and returning terminal to old settings finally: print("Stopping spacebrew"); brew.stop()
# For any subscriber, you can define any number of functions # that will get called with the sent value when a message arrives. # Here's a simple example of a function that recieves a value. def example(value): print "Got",value,type(value) # We call "subscribe" to associate a function with a subscriber. brew2.subscribe("sub1",example) brew2.subscribe("sub2",example) brew2.subscribe("sub3",example) # Calling start on a brew starts it running in a separate thread. brew1.start() brew2.start() # We'll publish a value every three seconds. While this is running, # go to your admin interface and connect the subscriber to the publisher # to see the values. try: while True: time.sleep(3) # The publish method sends a value from the specified # publisher. brew1.publish('pub','rub') except (KeyboardInterrupt, SystemExit) as e: # Calling stop on a brew disconnects it and waits for its # associated thread to finish. brew1.stop() brew2.stop()