Пример #1
0
class CapturePlugin(BaseDronePlugin):
    def __init__(self, interfaces, channel, drone):
        BaseDronePlugin.__init__(self, interfaces, channel, drone, "CapturePlugin.{0}".format(channel))
        self.logutil.log("Initializing")
        # Select interface
        try:
            self.kb = self.interfaces[0]
            self.kb.set_channel(self.channel)
            self.kb.active = True
        except Exception as e:
            print("failed to use interface")
            self.status = False
        # Pipe from the tasker to the filter module, used to send pickled tasking dictionaries (simple DictManager)
        recv_pconn, recv_cconn = Pipe()
        task_pconn, self.task_cconn = Pipe()
        self.task_queue = JoinableQueue()
        # Start the filter up
        self.p_filt = FilterProcess(
            recv_pconn, self.task_queue, self.done_event, self.task_update_event, self.drone, self.name
        )
        self.p_filt.start()
        self.logutil.log("Launched FilterProcess ({0})".format(self.p_filt.pid))
        self.childprocesses.append(self.p_filt)
        # Start the receiver up
        self.p_recv = SnifferProcess(recv_cconn, self.kb, self.done_event, self.drone, self.name)
        self.p_recv.start()
        self.logutil.log("Launched SnifferProcess: ({0})".format(self.p_recv.pid))
        self.childprocesses.append(self.p_recv)

    def task(self, uuid, data):
        self.logutil.log("Adding Task: {0}".format(uuid))
        if uuid in self.tasks:
            return False
        self.tasks[uuid] = data
        self.__update_filter_tasking()
        return True

    def detask(self, uuid):
        res = None
        if uuid in self.tasks:
            res = self.tasks.get(uuid)
            del self.tasks[uuid]
        else:
            return False
        if len(self.tasks) == 0:
            # Time to shut the whole party down, as we don't have any more tasks
            self.logutil.log("No remaining tasks, shutting down plugin")
            self.shutdown()
            # TODO return something to indicate a total shutdown also
        else:
            # We made a change to tasking, let's implement it
            self.__update_filter_tasking()
        # return res
        return True

    def __update_filter_tasking(self):
        self.logutil.log("Sending Task Updates to FilterProcess")
        self.task_queue.put_nowait(cPickle.dumps(self.tasks))
Пример #2
0
def downloadFile(sourceUrl: str, inboundQueue: JoinableQueue):
    """[Stream file from sourceUrl and place into queue for consumption]

    Args:
        sourceUrl (str): [The url of the source data]
        inboundQueue (JoinableQueue): [A queue from the multiprocessing module]
    """
    streamingReadFromURL = requests.get(sourceUrl, stream=True)
    for chunk in streamingReadFromURL.iter_lines(65536):
        inboundQueue.put_nowait(chunk)
Пример #3
0
def __run_chm_test_procs(mems, model, regions, ntasks, nthreads):
    """Starts ntasks processes running __run_chm_test_proc then calls __run_chm_test_parallel."""
    from multiprocessing import JoinableQueue, Process
    from time import sleep
    print("Running CHM test with %d task%s and %d thread%s per task" %
          (ntasks, 's' if ntasks > 1 else '', nthreads,
           's' if nthreads > 1 else ''))
    nthreads_full = ntasks * nthreads

    # Start the child processes
    q = JoinableQueue()
    args = (mems, model, nthreads, q)
    processes = [
        Process(target=__run_chm_test_proc, name="CHM-test-%d" % p, args=args)
        for p in xrange(ntasks)
    ]
    for p in processes:
        p.daemon = True
        p.start()
    sleep(0)

    # Run the CHM-test in parallel
    try:
        out = __run_chm_test_parallel(mems, model, regions, q, processes,
                                      nthreads_full)
    except:
        __clear_queue(q)
        __kill_processes(processes)
        raise

    # Tell all processes we are done and make sure they all actually terminate
    for _ in xrange(ntasks):
        q.put_nowait(None)
    q.close()
    q.join()
    q.join_thread()
    for p in processes:
        p.join()

    # Done! Return the output image
    return out
Пример #4
0
                        '--benchmark_freq',
                        help="How often to emit benchmark info",
                        type=int,
                        default=1000000)
    parser.add_argument('infile', nargs='+')
    arguments = parser.parse_args()

    file_queue = JoinableQueue()
    result_queue = JoinableQueue()

    date_after = None
    if arguments.after:
        date_after = datetime.datetime.strptime(arguments.after, "%Y")

    for file in arguments.infile:
        file_queue.put_nowait(file)

    for i in range(arguments.num_processes):
        file_queue.put_nowait('STOP')

    for i in range(arguments.num_processes):
        Process(target=wos_parser,
                args=(file_queue, result_queue, arguments.wos_only,
                      arguments.sample_rate, arguments.must_cite,
                      arguments.batch_size, date_after)).start()

    Process(target=pjk_writer,
            args=(result_queue, arguments.outfile,
                  arguments.benchmark_freq)).start()

    file_queue.join()
def main(factor = 2):
    #E.G: if total cores is 2 , no of processes to be spawned is 2 * factor
    files_to_download = JoinableQueue()
    result_queue = JoinableQueue()
    time_taken = JoinableQueue()
    time_taken_to_read_from_queue = JoinableQueue()
    with open('downloads.txt', 'r') as f:
        for to_download in f:
            files_to_download.put_nowait(to_download.split('\n')[0])
    files_to_download_size = files_to_download.qsize()
    cores = cpu_count()
    no_of_processes = cores * factor
    for i in xrange(no_of_processes):
        files_to_download.put_nowait(None)
    jobs = []
    start = datetime.datetime.now()
    for name in xrange(no_of_processes):
        p = Process(target = download, args = (files_to_download, result_queue,\
                                time_taken, time_taken_to_read_from_queue,name))
        p.start()
        jobs.append(p)

    for job in jobs:
        job.join()
    print result_queue.qsize()
    total_downloaded_urls = 0
    try:
        while 1:
            r = result_queue.get_nowait()
            total_downloaded_urls += r

    except Empty:
        pass

    try:
        while 1:
            """
                locals() keeps track of all variable, functions, class etc.
                datetime object is different from int, one cannot perform 
                0 + datetime.datetime.now(), if when we access the queue which 
                contains time objects first time, total_time will be set to 
                first time 
            """
            if 'total_time' in locals():
                total_time += time_taken.get_nowait()
            else:
                total_time = time_taken.get_nowait()
    except Empty:
        print("{0} processes on {1} core machine took {2} time to download {3}\
              urls".format(no_of_processes, cores, total_time, \
                                          total_downloaded_urls))

    try:
        while 1:
            if 'queue_reading_time' in locals():
                queue_reading_time += time_taken_to_read_from_queue.get_nowait()
            else:
                queue_reading_time = time_taken_to_read_from_queue.get_nowait()
    except Empty:
        print("{0} processes on {1} core machine took {2} time to read {3}\
              urls from queue".format(no_of_processes, cores,queue_reading_time\
              ,files_to_download_size))
class QiubaiSpider(object):
    def __init__(self):
        self.url_pattern = 'https://www.qiushibaike.com/8hr/page/{}/'
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'
        }
        # 1. 创建URL队列, 响应队列和数据队列(init)
        self.url_queue = JoinableQueue()
        self.page_queue = JoinableQueue()
        self.data_queue = JoinableQueue()

    def add_url_to_queue(self):
        """把url添加到队列里"""
        for i in range(1, 14):
            url = self.url_pattern.format(i)
            self.url_queue.put_nowait(url)

    def add_page_to_queue(self):
        """从url队列中,取出url,发送请求,获取响应数据,把响应数据,放到响应队列中"""
        while True:
            url = self.url_queue.get()
            response = requests.get(url, headers=self.headers)
            if response.status_code != 200:
                # 如果请求没有成功, 再次放到URL队列
                self.url_queue.put(url)
            else:
                # 把响应数据添加响应队列中
                self.page_queue.put(response.content.decode())
            # 当URL处理完成了,就调用task_done
            self.url_queue.task_done()

    def add_date_to_queue(self):
        """从响应队列中取出响应数据,提取数据,把数据放到数据队列中"""
        while True:
            page = self.page_queue.get()
            element = etree.HTML(page)
            divs = element.xpath('//*[@id="content-left"]/div')
            # 使用xpath提取数据的原则: 先分组,再提取内容
            data_list = []
            for div in divs:
                # 定义字典保存数据
                data = {}
                imgs = div.xpath('./div[1]/a[1]/img/@src')
                data['header_img'] = 'https' + imgs[0] if len(
                    imgs) != 0 else None

                data['name'] = self.get_first_element(
                    div.xpath('./div[1]/a[2]/h2/text()'))
                gender_class = div.xpath('./div[1]/div/@class')
                if len(gender_class) != 0:
                    data['gender'] = re.findall('articleGender (.+?)Icon',
                                                gender_class[0])[0]

                data['content'] = ''.join([
                    text.strip() for text in div.xpath('./a/div/span//text()')
                ])
                data['vote'] = self.get_first_element(
                    div.xpath('./div[2]/span[1]/i/text()'))
                data['comments'] = self.get_first_element(
                    div.xpath('./div[2]/span[2]/a/i/text()'))

                data_list.append(data)
            # 把数据数据,添加数据队列中
            self.data_queue.put(data_list)
            # 页面任务处理完毕了
            self.page_queue.task_done()

    def get_first_element(self, lis):
        return lis[0].strip() if len(lis) != 0 else None

    def save_data(self):
        """保存数据"""
        while True:
            data_list = self.data_queue.get()
            with open('糗百_多进程版.jsonlines', 'a', encoding='utf8') as f:
                for data in data_list:
                    json.dump(data, f, ensure_ascii=False)
                    f.write('\n')
            # 数据任务完成了
            self.data_queue.task_done()

    def execute_task(self, task, count):
        """
        执行线程任务
        :param task: 任务函数
        :param count: 启动线程个数
        """
        for i in range(count):
            t = Process(target=task)
            t.daemon = True
            t.start()

    def run(self):

        self.execute_task(self.add_url_to_queue, 1)
        self.execute_task(self.add_page_to_queue, 2)
        self.execute_task(self.add_date_to_queue, 2)
        self.execute_task(self.save_data, 2)

        # 如果你执行过程中, 程序很快就结束了, 此时你需要再这里稍微等等
        time.sleep(1)
        # 让主线等待任务队列的完成
        self.url_queue.join()
        self.page_queue.join()
        self.data_queue.join()
Пример #7
0
class ImagesBatcher(AbstractDataBatcher):
    def __init__(
            self,
            queue_size,
            batch_size,
            data_sampler,
            image_processor=None,
            audio_processor=None,
            single_epoch=False,
            cache_data=False,  # TODO: implement me!
            disk_reader_process_num=1):
        """
        Class for creating sequence of data batches for training or validation.
        :param queue_size: queue size for Batch readers
        :param batch_size: size of batches generated
        :param dataset_parser: dataset structure-related parser with all images and labels
        :param image_processor: image reading and preprocessing routine
        :param data_sampler: knows how to sample batches from dataset
        :param single_epoch: if enabled, image batcher finish one epoch with None batch
        :param cache_data: do we need to store all data in batcher memory?
        :param disk_reader_process_num: how many disk readers do we need?
        """
        super(AbstractDataBatcher, self).__init__()

        # set parameters
        self.batch_size = batch_size
        self.epoch_is_finished = False
        self.batch_queue_balance = 0
        if single_epoch:
            self.sampler_external_info = type('sampler_external_info',
                                              (object, ),
                                              dict(single_epoch=True))
        else:
            self.sampler_external_info = None

        # parse given dataset and init data sampler
        self.data_sampler = data_sampler

        # set queues
        if queue_size == -1:
            queue_size = self.data_sampler.dataset_size() / self.batch_size + 1
        self.task_queue = JoinableQueue(queue_size)
        self.batch_queue = JoinableQueue(queue_size)

        # init batch disk readers and start they
        self.data_readers = []
        print('disk_reader_process_num:', disk_reader_process_num)
        for i in range(disk_reader_process_num):
            self.data_readers.append(
                (BatchDiskReader(self.task_queue, self.batch_queue,
                                 image_processor, audio_processor)))

    def start(self):
        self.epoch_is_finished = False

        # start batch disk readers
        for reader in self.data_readers:
            reader.start()

        # fill task queue with batches to start async reading from disk
        self.fill_task_queue()

    def fill_task_queue(self):
        try:
            while True:
                if not self.task_queue.full():
                    batch = self.data_sampler.sampling(
                        self.batch_size, self.sampler_external_info)
                    if batch is not None:
                        self.task_queue.put_nowait(batch)
                        self.batch_queue_balance += 1
                    else:
                        self.epoch_is_finished = True
                        break
                else:
                    break
        except Exception as e:  #Queue.Full:
            logger.error("ImagesBatcher: ", e)

    def next_batch(self):
        """
        Returns next batch from data
        """
        if self.epoch_is_finished and self.batch_queue_balance == 0:
            self.epoch_is_finished = False
            self.fill_task_queue()
            return None

        batch = self.batch_queue.get(block=True)
        self.batch_queue.task_done()
        self.batch_queue_balance -= 1
        if not self.epoch_is_finished:
            # fill task queue
            self.fill_task_queue()
        return batch

    def update_sampler(self, target, logits, step, summary_writer):
        if hasattr(self.data_sampler, 'update'):
            labels = target.cpu().data.numpy()
            is_update_sampler = self.data_sampler.update(
                labels, logits, step, summary_writer)
        #if is_update_sampler:
        #    self.clear_queue()

    def clear_queue(self):
        try:
            while True:
                self.task_queue.get_nowait()
                self.task_queue.task_done()
        except Exception as e:
            pass
        try:
            while True:
                self.batch_queue.get_nowait()
                self.batch_queue.task_done()
        except Exception as e:
            pass
        self.fill_task_queue()

    def finish(self):
        for data_reader in self.data_readers:
            data_reader.deactivate()

        while not self.task_queue.empty():
            self.task_queue.get()
            self.task_queue.task_done()

        is_anybody_alive = [
            data_reader.is_alive() for data_reader in self.data_readers
        ].count(True) > 0
        while not self.batch_queue.empty() or is_anybody_alive:
            try:
                self.batch_queue.get(timeout=1)
                self.batch_queue.task_done()
                is_anybody_alive = [
                    data_reader.is_alive() for data_reader in self.data_readers
                ].count(True) > 0
            except Exception as e:
                pass

        self.task_queue.join()
        self.batch_queue.join()
        for data_reader in self.data_readers:
            data_reader.join()
Пример #8
0
def fit_for_all(drop_non_countries=False):
    """Main function to perform fit for all countries."""
    ####################################################################
    # Read files
    train_df = pd.read_csv(TRAIN_FILE, encoding='cp1252',
                           index_col='Country Name').dropna(axis=0)
    test_df = pd.read_csv(TEST_FILE, encoding='cp1252',
                          index_col='Country Name').dropna(axis=0)

    # The test_df has one extra country. Line up train and test.
    test_df = test_df.loc[train_df.index]

    if drop_non_countries:
        train_df = train_df.drop(NON_COUNTRIES)
        test_df = test_df.drop(NON_COUNTRIES)

    # Get matrices.
    train_mat = train_df.values.T.astype(int)
    test_mat = test_df.values.T.astype(int)

    # Grab list and number of countries for convenience.
    countries = train_df.index.values
    num_countries = countries.shape[0]

    # Initialize queues for parallel processing.
    queue_in = JoinableQueue()
    queue_out = Queue()

    # Start processes.
    processes = []
    for i in range(NUM_PROCESSES):
        p = Process(target=fit_for_country_worker, args=(train_mat, test_mat,
                                                         queue_in, queue_out))
        p.start()
        processes.append(p)

    # Loop over all the countries (columns of the train matrix).
    for i in range(num_countries):
        # Put boolean array in the queue.
        queue_in.put((i, num_countries))

    # Wait for processing to finish.
    queue_in.join()

    # Track coefficients.
    best_coeff = pd.DataFrame(0.0, columns=countries, index=countries)

    # Track training scores.
    best_scores = pd.Series(0.0, index=countries)

    # Track predictions.
    predictions = pd.DataFrame(0.0, columns=test_df.columns, index=countries)

    # Map data.
    for _ in range(num_countries):
        # Grab data from the queue.
        other_countries, s, c, p = queue_out.get()

        country = countries[~other_countries][0]

        # Map.
        best_scores.loc[~other_countries] = s
        best_coeff.loc[other_countries, country] = c
        # p needs to be transformed (17x1 vs 1x17)
        predictions.loc[~other_countries, :] = p.T

    # Shut down processes.
    for p in processes:
        queue_in.put_nowait(None)
        p.terminate()

    predictions.transpose().to_csv(PRED_OUT, index_label='Id',
                                   encoding='cp1252')
    best_coeff.to_csv(COEFF_OUT, encoding='cp1252')

    # Print MSE
    print('Summary of MSE:')
    print(best_scores.describe())
Пример #9
0
class EWProtocol(BaseProtocol):
	"""
	Base class that contains shared functionality between the two proxy's comm protocols
	Data sent over is buffered and lz4 compressed
	"""
	def __init__(self, factory, buff_class, handle_direction, other_factory, buffer_wait):
		"""
		Protocol args:
			factory: factory that made this protocol (subclass of EWFactory)
			other_factory: the other factory that communicates with this protocol (in this case an instance of MCProtocol)
			buffer_wait: amount of time to wait before sending buffered packets (in ms)
		"""
		super().__init__(factory, buff_class, handle_direction, other_factory)
		self.buffer_wait = buffer_wait

		self.compressor_input_queue = JoinableQueue()
		self.compressor_output_queue = JoinableQueue()
		self.depressor_input_queue = JoinableQueue()
		self.depressor_output_queue = JoinableQueue()

		self.compression_handler = OutboxHandlerThread(self.compressor_output_queue, reactor.callFromThread, self.send_data)
		self.decompression_handler = OutboxHandlerThread(self.depressor_output_queue, reactor.callFromThread, super().dataReceived)

		self.compressors = []
		for x in range(COMP_THREADS):
			self.compressors.append(Compressor(self.compressor_input_queue, self.compressor_output_queue))

		self.depressors = []
		for x in range(DEP_THREADS):
			self.depressors.append(Depressor(self.depressor_input_queue, self.depressor_output_queue))

	def connectionMade(self):
		"""
		Called when a connection is made
		"""
		super().connectionMade()

		if self.factory.instance: # Only one protocol can exist
			self.transport.loseConnection()
			return

		self.factory.instance = self

		# Start compressor and depressor
		for x in self.compressors:
			x.start()
		for x in self.depressors:
			x.start()

		# Start handlers
		self.compression_handler.start()
		self.decompression_handler.start()

		# Run self.send_buffered_packets every self.buffer_wait ms
		reactor.callLater(self.buffer_wait/1000, self.send_buffered_packets)

	def connectionLost(self, reason):
		super().connectionLost(reason)

		# Remove factory instance
		self.factory.instance = None

		# Stop compressor and decompressor
		for x in self.compressors:
			x.terminate()

		for x in self.depressors:
			x.terminate()

		try:
			self.compression_handler.kill()
			self.decompression_handler.kill()
		except:
			pass
	
		# Stop handlers
		self.compression_handler.running = False
		self.decompression_handler.running = False
		self.compression_handler.join()
		self.decompression_handler.join()

	def dataReceived(self, data):
		"""
		Called by twisted when data is received over tcp by the protocol
		"""
		self.depressor_input_queue.put_nowait(data)

	def get_packet_name(self, id):
		"""
		Get packet name from id
		Meant to be overriden
		Args:
			id: id of the packet
		Returns:
			name: name of the packet
		"""
		try:
			info = packet_names[id]
		except KeyError:
			self.logger.error("No packet with id: {}".format(id))
			raise KeyError

		if self.handle_direction not in info[1]:
			self.logger.error("Wrong direction for packet id: {}".format(id))
			raise KeyError

		return info[0]

	def get_packet_id(self, name):
		"""
		Get packet name from id
		Meant to be overriden
		Args:
			name: name of the packet
		Returns:
			id: id of the packet
		"""
		try:
			info = packet_ids[name]
		except KeyError:
			self.logger.error("No packet with name: {}".format(name))
			raise KeyError

		if self.send_direction not in info[1]:
			self.logger.error("Wrong direction for packet name: {}".format(name))
			raise KeyError

		return info[0]

	def send_packet(self, name, *data):
		"""
		Sends a ew packet to the other proxy
		"""
		data = b"".join(data) # Combine data
		data = self.buff_class.pack_varint(self.get_packet_id(name)) + data # Prepend packet ID
		data = self.buff_class.pack_packet(data) # Pack data as a packet

		self.compressor_input_queue.put_nowait(data)

	def send_data(self, data):
		"""
		Callback for compressor
		"""
		self.transport.write(data)

	def send_buffered_packets(self):
		"""
		Sends all packets in self.input_buffer to the other proxy as a poem
		"""
		# Schedule the next call
		reactor.callLater(self.buffer_wait/1000, self.send_buffered_packets)

		if len(self.factory.input_buffer) < 1: # Do not send empty packets
			return

		data = []
		for i in range(len(self.factory.input_buffer)): # Per packet info
			uuid, packet_name, packet_data = self.factory.input_buffer.popleft()
			buff = packet_data.buff # We don't use read because we need the entire buffer's data

			data.append(self.buff_class.pack_uuid(uuid)) # Pack uuid of client
			# TODO: Pass the id instead of the string name to save bandwidth?
			buff = self.buff_class.pack_string(packet_name) + buff # Prepend packet name to buffer
			data.append(self.buff_class.pack_packet(buff)) # Append buffer as packet

			packet_data.discard() # Buffer is no longer needed

		# Send poem
		self.send_packet("poem", *data)

	def packet_poem(self, buff):
		"""
		Parses the poem and dispatches callouts with packet_mc_* callbacks
		Also forwards the packets afterwards
		"""
		data = []
		try:
			while True: # Unpack data until a bufferunderrun
				uuid = buff.unpack_uuid()
				packet = buff.unpack_packet(self.buff_class) # Packet is unpacked here as the subclass will just forward it
				packet_name = packet.unpack_string()
				packet.save()
				data.append((uuid, packet_name, packet))
		except BufferUnderrun:
			pass

		buff.discard() # Discard when done

		# Dispatch calls
		for packet in data:
			try:
				new_packet = self.dispatch(("mc", packet[1]), packet[0], packet[2])
			except BufferUnderrun:
				self.logger.info("Packet is too short: {}".format(packet[1]))
				continue

			# If nothing was returned, the packet should be sent as it was originally
			if not new_packet:
				new_packet = packet

			# Forward packet
			if new_packet[2] != None: # If the buffer is none, it was explictly stated to not send the packet!
				try:
					self.other_factory.get_client(new_packet[0]).send_packet(new_packet[1], new_packet[2].buff)
				except KeyError:
					# The client has disconnected already, ignore
					pass
Пример #10
0
    parser.add_argument('-n', '--num-processes', help="Number of subprocesses to start", default=4, type=int)
    parser.add_argument('-b', '--batch-size', help="Number of entries to batch prior to transmission", default=100, type=int)
    parser.add_argument('-a', '--after', help="Only include nodes published on or after this year")
    parser.add_argument('-bf', '--benchmark_freq', help="How often to emit benchmark info", type=int, default=1000000)
    parser.add_argument('infile', nargs='+')
    arguments = parser.parse_args()

    file_queue = JoinableQueue()
    result_queue = JoinableQueue()

    date_after = None
    if arguments.after:
        date_after = datetime.datetime.strptime(arguments.after, "%Y")

    for file in arguments.infile:
        file_queue.put_nowait(file)

    for i in range(arguments.num_processes):
        file_queue.put_nowait('STOP')

    for i in range(arguments.num_processes):
        Process(target=wos_parser, args=(file_queue,
                                         result_queue,
                                         arguments.wos_only,
                                         arguments.sample_rate,
                                         arguments.must_cite,
                                         arguments.batch_size,
                                         date_after)).start()

    Process(target=pjk_writer, args=(result_queue, arguments.outfile, arguments.benchmark_freq)).start()
def main(factor=2):
    #E.G: if total cores is 2 , no of processes to be spawned is 2 * factor
    files_to_download = JoinableQueue()
    result_queue = JoinableQueue()
    time_taken = JoinableQueue()
    time_taken_to_read_from_queue = JoinableQueue()
    with open('downloads.txt', 'r') as f:
        for to_download in f:
            files_to_download.put_nowait(to_download.split('\n')[0])
    files_to_download_size = files_to_download.qsize()
    cores = cpu_count()
    no_of_processes = cores * factor
    for i in xrange(no_of_processes):
        files_to_download.put_nowait(None)
    jobs = []
    start = datetime.datetime.now()
    for name in xrange(no_of_processes):
        p = Process(target = download, args = (files_to_download, result_queue,\
                                time_taken, time_taken_to_read_from_queue,name))
        p.start()
        jobs.append(p)

    for job in jobs:
        job.join()
    print result_queue.qsize()
    total_downloaded_urls = 0
    try:
        while 1:
            r = result_queue.get_nowait()
            total_downloaded_urls += r

    except Empty:
        pass

    try:
        while 1:
            """
                locals() keeps track of all variable, functions, class etc.
                datetime object is different from int, one cannot perform 
                0 + datetime.datetime.now(), if when we access the queue which 
                contains time objects first time, total_time will be set to 
                first time 
            """
            if 'total_time' in locals():
                total_time += time_taken.get_nowait()
            else:
                total_time = time_taken.get_nowait()
    except Empty:
        print("{0} processes on {1} core machine took {2} time to download {3}\
              urls"                   .format(no_of_processes, cores, total_time, \
                                          total_downloaded_urls))

    try:
        while 1:
            if 'queue_reading_time' in locals():
                queue_reading_time += time_taken_to_read_from_queue.get_nowait(
                )
            else:
                queue_reading_time = time_taken_to_read_from_queue.get_nowait()
    except Empty:
        print("{0} processes on {1} core machine took {2} time to read {3}\
              urls from queue"                              .format(no_of_processes, cores,queue_reading_time\
              ,files_to_download_size))
Пример #12
0
class CapturePlugin(BaseDronePlugin):
    def __init__(self, interfaces, channel, drone):
        BaseDronePlugin.__init__(self, interfaces, channel, drone,
                                 'CapturePlugin.{0}'.format(channel))
        self.logutil.log('Initializing')
        # Select interface
        try:
            self.kb = self.interfaces[0]
            self.kb.set_channel(self.channel)
            self.kb.active = True
        except Exception as e:
            print("failed to use interface")
            self.status = False
        # Pipe from the tasker to the filter module, used to send pickled tasking dictionaries (simple DictManager)
        recv_pconn, recv_cconn = Pipe()
        task_pconn, self.task_cconn = Pipe()
        self.task_queue = JoinableQueue()
        # Start the filter up
        self.p_filt = FilterProcess(recv_pconn, self.task_queue,
                                    self.done_event, self.task_update_event,
                                    self.drone, self.name)
        self.p_filt.start()
        self.logutil.log('Launched FilterProcess ({0})'.format(
            self.p_filt.pid))
        self.childprocesses.append(self.p_filt)
        # Start the receiver up
        self.p_recv = SnifferProcess(recv_cconn, self.kb, self.done_event,
                                     self.drone, self.name)
        self.p_recv.start()
        self.logutil.log('Launched SnifferProcess: ({0})'.format(
            self.p_recv.pid))
        self.childprocesses.append(self.p_recv)

    def task(self, uuid, data):
        self.logutil.log('Adding Task: {0}'.format(uuid))
        if uuid in self.tasks:
            return False
        self.tasks[uuid] = data
        self.__update_filter_tasking()
        return True

    def detask(self, uuid):
        res = None
        if uuid in self.tasks:
            res = self.tasks.get(uuid)
            del self.tasks[uuid]
        else:
            return False
        if len(self.tasks) == 0:
            # Time to shut the whole party down, as we don't have any more tasks
            self.logutil.log('No remaining tasks, shutting down plugin')
            self.shutdown()
            #TODO return something to indicate a total shutdown also
        else:
            # We made a change to tasking, let's implement it
            self.__update_filter_tasking()
        #return res
        return True

    def __update_filter_tasking(self):
        self.logutil.log('Sending Task Updates to FilterProcess')
        self.task_queue.put_nowait(cPickle.dumps(self.tasks))