def __init__(self): mac = get_mac() client_id = str(hex(mac))[2:] + '-sub' self.mqttc = mqtt.Client(client_id=client_id, userdata=self) self.mqttc.connect('iot.eclipse.org') self.mqttc.on_connect = on_connect self.mqttc.on_disconnect = on_disconnect self.mqttc.on_subscribe = on_subscribe self.mqttc.on_unsubscribe = on_unsubscribe self.mqttc.on_message = on_message self.repo = Repository()
def main(): """ Method to run if executed as script. """ options, args = parse_options() if options.fetch: # get URLs and data directory = os.path.abspath(options.directory) if os.path.exists(directory) and os.listdir(directory): sys.exit("The target directory must be empty.") print "Determine number of images to fetch..." urls = get_urls(args, options.pages) number_of_images = [(tag, len(urls[tag])) for tag in urls] total = reduce(lambda x,y: x+y, [t[1] for t in number_of_images]) print "Fetching %i images (%s) into %s..." % (total, ', '.join(["%s: %i" % (tag, number) for tag, number in number_of_images]), directory) fetch_data(os.path.abspath(options.directory), urls, True) print "\nAll images fetched." elif options.analyze: # go to analyzer mode if not args: path = Repository.get_last() # open last fetched data else: path = args[0] directory = os.path.abspath(path) if not os.path.exists(directory): sys.exit("The target directoy must exist.") rep = Repository(directory) # load analyzer analyser = [get_class(m)(rep) for m in settings.ANALYZERS] _cmd = AnalyzerCmd(rep, analyser) _cmd.cmdloop("here we go...")
def fetch_data(dir, tags=None, print_progress=False, threads=50): """ Fetches the content of the URLs provided via tags into the directory specified by dir. INPUT: - dir: The directory to store the data - tags: A dictionary of tags, each containing a list of tuples. See 'get_urls'. - print_progress: Show a progress bar - threads: The maximum amount of threads that are started to fetch the data. OUTPUT: None """ if not tags: return repository = Repository(dir, new=True) repository.set_last() # Function to be executed by the producer thread. Creates a new thread # for each URL and puts it into a queue def producer(queue, tags): try: for tag in tags: for id, page_url, image_url in tags[tag]: thread = FileGetter(id, tag, page_url, image_url) thread.start() queue.put(thread, True) except KeyboardInterrupt: raise # Function to be executed by the consumer thread. Gets every thread in the # queue, joins it to terminate it properly and stores the data in a repository. def consumer(queue, rep, total_files, print_progress): if print_progress: bar = ProgressBar(total_files, width=50) counter = 0 try: while counter < total_files: # run until all images are fetched thread = queue.get(True) thread.join() if thread.has_result: rep.add_site(thread.tag, thread.id, thread.page) rep.add_image(thread.tag, thread.id, thread.image) counter += 1 if print_progress: bar.add() else: total_files -= 1 if print_progress: bar = ProgressBar(total_files, width=50) bar.add(counter) if print_progress: sys.stdout.write("%i%% %r fetched %i of %i \r" %( counter*100/total_files, bar, counter, total_files)) sys.stdout.flush() except KeyboardInterrupt: raise q = Queue(threads) prod_thread = threading.Thread(target=producer, args=(q, tags)) total = reduce(lambda x,y: x+y, [len(tags[tag]) for tag in tags]) cons_thread = threading.Thread(target=consumer, args=(q, repository, total, print_progress)) prod_thread.start() cons_thread.start() prod_thread.join() cons_thread.join()