def __init__(self, sframe, feature_column, annotations_column, load_labels=True, shuffle=True, samples=None, buffer_size=256): # This buffer_reset_queue will be used to communicate to the background # thread. Each "message" is itself a _Queue that the background thread # will use to communicate with us. buffer_reset_queue = _Queue() def worker(): data_source = _SFrameDataSource(sframe, feature_column, annotations_column, load_labels=load_labels, shuffle=shuffle, samples=samples) while True: buffer = buffer_reset_queue.get() if buffer is None: break # No more work to do, exit this thread. for row in data_source: buffer.put(row) # Check if we've been reset (or told to exit). if not buffer_reset_queue.empty(): break # Always end each output buffer with None to signal completion. buffer.put(None) data_source.reset() self.worker_thread = _Thread(target=worker) self.worker_thread.daemon = True self.worker_thread.start() self.buffer_reset_queue = buffer_reset_queue self.buffer_size = buffer_size # Create the initial buffer and send it to the background thread, so # that it begins sending us annotated images. self.buffer = _Queue(self.buffer_size) self.buffer_reset_queue.put(self.buffer)
def reset(self): # Send a new buffer to the background thread, telling it to reset. buffer = _Queue(self.buffer_size) self.buffer_reset_queue.put(buffer) # Drain self.buffer to ensure that the background thread isn't stuck # waiting to put something into it (and therefore never receives the # new buffer). if self.buffer is not None: while self.buffer.get() is not None: pass self.buffer = buffer
def _init_queue(split_text): """Initialize queue by first words from `split_text`. Parameters ---------- split_text : list of str Splitted text. Returns ------- Queue Initialized queue. """ queue = _Queue() first_window = _get_first_window(split_text) for word in first_window[1:]: queue.put(word) return queue
def _init_queue(split_text): queue = _Queue() first_window = _get_first_window(split_text) for word in first_window[1:]: queue.put(word) return queue
def extract_features(self, dataset, feature, batch_size=64, verbose=False): """ Parameters ---------- dataset: SFrame SFrame of images """ from ._mxnet._mx_sframe_iter import SFrameImageIter as _SFrameImageIter from six.moves.queue import Queue as _Queue from threading import Thread as _Thread import turicreate as _tc import array if len(dataset) == 0: return _tc.SArray([], array.array) batch_size = min(len(dataset), batch_size) # Make a data iterator dataIter = _SFrameImageIter(sframe=dataset, data_field=[feature], batch_size=batch_size, image_shape=self.image_shape) # Setup the MXNet model model = MXFeatureExtractor._get_mx_module(self.ptModel.mxmodel, self.data_layer, self.feature_layer, self.context, self.image_shape, batch_size) out = _tc.SArrayBuilder(dtype=array.array) progress = {'num_processed': 0, 'total': len(dataset)} if verbose: print("Performing feature extraction on resized images...") # Encapsulates the work done by the MXNet model for a single batch def handle_request(batch): model.forward(batch) mx_out = [ array.array('d', m) for m in model.get_outputs()[0].asnumpy() ] if batch.pad != 0: # If batch size is not evenly divisible by the length, it will loop back around. # We don't want that. mx_out = mx_out[:-batch.pad] return mx_out # Copies the output from MXNet into the SArrayBuilder and emits progress def consume_response(mx_out): out.append_multiple(mx_out) progress['num_processed'] += len(mx_out) if verbose: print('Completed {num_processed:{width}d}/{total:{width}d}'. format(width=len(str(progress['total'])), **progress)) # Create a dedicated thread for performing MXNet work, using two FIFO # queues for communication back and forth with this thread, with the # goal of keeping MXNet busy throughout. request_queue = _Queue() response_queue = _Queue() def mx_worker(): while True: batch = request_queue.get() # Consume request if batch is None: # No more work remains. Allow the thread to finish. return response_queue.put(handle_request(batch)) # Produce response mx_worker_thread = _Thread(target=mx_worker) mx_worker_thread.start() try: # Attempt to have two requests in progress at any one time (double # buffering), so that the iterator is creating one batch while MXNet # performs inference on the other. if dataIter.has_next: request_queue.put(next(dataIter)) # Produce request while dataIter.has_next: request_queue.put(next(dataIter)) # Produce request consume_response(response_queue.get()) consume_response(response_queue.get()) finally: # Tell the worker thread to shut down. request_queue.put(None) return out.close()