def __init__(self, obu_idx, total_obu_num, GLOBAL_ID_OBU_i_PK_i_map): super().__init__() self.ID_OBU_i = get_ID_obu_i(obu_idx) self.total_num = total_obu_num self.idx = str(obu_idx) self.logger = get_logger("OBU_" + self.idx) # ========= 生成属性 ========= self.r_i = random.randint(1, self.P) self.x_i, self.y_i, self.PK_i = get_x_i_y_i_PK_i( self.P, self.ID_OBU_i, self.s) GLOBAL_ID_OBU_i_PK_i_map[self.ID_OBU_i] = self.PK_i self.GLOBAL_ID_OBU_i_PK_i_map = GLOBAL_ID_OBU_i_PK_i_map self.R_i = self.r_i * self.P self.Ri_map = dict() self.R = 0 self.logger.debug("x_i: {}, y_i: {}, PK_i: {}, R_i: {}".format( self.x_i, self.y_i, self.PK_i, self.R_i)) # ========= 信号量定义 ======= self.sem_r_receive = Semaphore(0) self.sem_rsp_msg = Semaphore(0) # ========= 网络设置 ======== self.send_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.send_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) self.send_socket.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) # ========= 回包 ========= self.rsp_msg = None
def start(self, test_q: JoinableQueue, result_q: Queue) -> None: """ Start all worker processes :return: this object """ local_test_q = JoinableQueue() self._node_manager = Node.Manager(as_main=True, port=self.__class__._node_port) start_sem = Semaphore(self._max_simultaneous_connections) # This will be used to throttle the number of connections made when makeing distributed call to get # node-level and global-level fixtures; otherwise multiproceissing can hang on these calls if # overwhelmed fixture_sem = Semaphore(self._max_simultaneous_connections) for index in range(self._num_processes): proc = WorkerSession.start( index, self._host, self._port, start_sem, fixture_sem, local_test_q, result_q, self._node_port, ) self._worker_procs.append(proc) start_sem.release() self._test_q_process = Process(target=self._process_test_q, args=(test_q, local_test_q)) self._test_q_process.start()
def __init__(self, n, timeout=None): self.n = n self.to = timeout self.count = Value('i', 0) self.mutex = Semaphore(1) self.turnstile1 = Semaphore(0) self.turnstile2 = Semaphore(1)
def __init__(self, Files, mode, t, threads): """ Construtor de PZip. Requires: files e' uma lista de strings, mode e' um a string que toma valores c ou d, t e' um boolean e threads e' um int. Ensures: Zip ou unzip de ficheiros contidos em 'files' """ self.files = Files global pointer pointer = 0 self.sem = Semaphore(1) self.t = t self.totalFilesSem = Semaphore(1) global totalFiles totalFiles = 0 global error_flag error_flag = False threadList = [ Thread(target=(self.zip if mode == 'c' else self.unzip)) for i in range(( threads[0] if threads[0] <= len(Files) else len(Files))) ] for thread in threadList: thread.start() for thread in threadList: thread.join() print "Foram", ("comprimidos" if mode == 'c' else "descomprimidos"), str(totalFiles), "ficheiros."
def __init__(self, no_cuda, model_dir, model_file, decider, threshold, entities, decider_processes, classifier_processes, batch_size): self._process_queue = [] self._process_queue_sem = Semaphore(0) self._main_sem = Semaphore(1) self._no_cuda = no_cuda self._model_dir = model_dir self._model_file = model_file self._decider = decider self._threshold = threshold self._entities = entities self._decider_processes = decider_processes self._classifier_processes = classifier_processes self._batch_size = batch_size self._rank_intervalls = np.linspace(0.001, 0.1, 100) self._quantiles = np.linspace(0.1, 1, 10) self._return_full = False self._sequence = self.process_sequence()
def __init__(self, result_sequence=None, min_level=2, name="JobQueue", verbose=False, feeder_queue=None, limit=None): limit = max(1, limit) if limit is not None else None self._verbose = verbose self._result_sequence = result_sequence self._next_call_sem = Semaphore( 0) if result_sequence is not None else None self._execution_sem = Semaphore( 1) if result_sequence is not None else None self._prio_levels = [l for l in range(0, min_level)] self._priorities = {l: list() for l in self._prio_levels} self._name = name self._main_sem = Semaphore(1) self._job_counter = 0 self._process_queue = dict() self._process_queue_sem = Semaphore(0) self._feeder_queue = feeder_queue self._limit_sem = [Semaphore(limit) for _ in range(0, min_level) ] if limit is not None else None
def prog_proc(cam_index: int, running: Value): shm_arr_dim = (1080, 1920, 3) image_dim = (IMG_SIZE[1], IMG_SIZE[0], 3) image_processors = list() sems1 = list() sems2 = list() sems3 = list() shm_arrs = list() shm_ovl_arrs = list() out_name = profile_outdir + "test_shm_proc_" + str(cam_index) + ".prof" pflr = cProfile.Profile() pflr.enable() for i in range(NUM_CAM_PROCS): sems1.append(Semaphore(0)) sems2.append(Semaphore(0)) sems3.append(Semaphore(1)) shm_ovl_arrs.append(Array(c_char, BYTESTR_SIZE)) shm_arrs.append( Array('i', shm_arr_dim[0] * shm_arr_dim[1] * shm_arr_dim[2])) image_processors.append( Thread(target=image_processor, daemon=True, args=( cam_index, shm_arrs[i], image_dim, sems1[i], sems2[i], shm_ovl_arrs[i], running, ))) image_processors[i].start() cam = Thread(target=cam_reader, daemon=True, args=( cam_index, shm_arrs, image_dim, sems1, sems3, shm_ovl_arrs, running, )) cam.start() disp = Thread(target=vid_disp, daemon=True, args=( cam_index, shm_arrs, image_dim, sems2, sems3, running, )) disp.start() while running.value != 0: sleep(1) sleep(5) pflr.disable() pflr.dump_stats(out_name)
def start(self): if hasattr(self.__generator, 'init'): self.__generator.init() self.__status.value = True self.__inputSM = { "q": Queue(), "emptySemaphore": Semaphore(value=0), "fullSemaphore": Semaphore(value=self.__size), "mutex": Lock() } self.__batchSM = { "q": Queue(), "emptySemaphore": Semaphore(value=0), "fullSemaphore": Semaphore(value=self.__size), "mutex": Lock() } childCount = self.__childCount if self.__childCount != -1 else cpu_count( ) self.__childProcess = [] self.__mProcess = Process(target=self.monitor, args=(self.__status, self.__inputSM)) self.__mProcess.start() for i in range(childCount): p = Process(target=self.loadBatch, args=(self.__status, self.__inputSM, self.__batchSM)) p.start() self.__childProcess.append(p)
def __init__(self, size, max_seq_length, tokenizer, ned_sql_file, entities_file, embeddings, n_trees, distance_measure, entity_index_path, search_k, max_dist, sentence_subset=None, bad_count=10, lookup_processes=0, pairing_processes=0): self._size = size self._max_seq_length = max_seq_length self._tokenizer = tokenizer self._ned_sql_file = ned_sql_file self._entities_file = entities_file self._entities = pd.read_pickle(entities_file) self._embeddings = embeddings self._n_trees = n_trees self._distance_measure = distance_measure self._entity_index_path = entity_index_path self._search_k = search_k self._max_dist = max_dist self._sentence_subset = sentence_subset self._bad_count = bad_count self._max_bad_count = 50 self._sequence = self.get_features() self._counter = 0 self._lookup_processes = lookup_processes self._pairing_processes = pairing_processes self._lookup_sem = Semaphore(100) self._convert_sem = Semaphore(1000)
def generator(self, *args, **kwargs): """ This function warp generator to ParaWrapper's generator which is capable of multi-processing Once the generator function was settled, we can send worker with the task then work with full-load until meet the buff_size limit The worker's job is to feed the list and keep it contains more than <buff_size> batches """ # Initialization semaphores and numbering buff_count = Semaphore(value=0) target_remain = Semaphore(value=self.buff_size) number = str(self.gen_num) self.gen_num += 1 # Initializing list self.batch_list[number] = self.manager.list() # Assign work and send worker gen = self.datagen.generator(*args, **kwargs) worker = Process(target=self.task, args=(gen, number, target_remain, buff_count)) worker.start() while True: buff_count.acquire(block=True) ret = self.batch_list[number].pop() target_remain.release() yield ret
def _get_locks(name): with _locker: return _rw_locks.setdefault( name, { 'access': Semaphore(1), 'readers': Semaphore(1), 'order': Semaphore(1), 'rds': 0 })
def main(): run_svm = Semaphore() run_rf = Semaphore() inp_rf = Queue() inp_svm = Queue() out_rf = Queue() out_svm = Queue() kin_stream = Queue() res_mixed = Queue() term_queue = Queue() processes = {} processes['preproc'] = Preprocessing(term_queue, input_queues=kin_stream, output_queues=[inp_rf, inp_svm], name='preproc') processes['svm_class'] = Classifier(term_queue, cs.ACTIONS_CLASSIFIER_SIMPLE, input_queues=inp_svm, output_queues=out_svm, sema_to_acquire=run_svm, sema_to_release=run_rf, name='svm_class') processes['rf_class'] = Classifier(term_queue, cs.POSES_CLASSIFIER, input_queues=inp_rf, output_queues=out_rf, sema_to_acquire=run_rf, sema_to_release=run_svm, name='rf_class') mixedclassifier_simple = p2a.MixedClassifier( cs.ACTIONS_CLASSIFIER_SIMPLE, cs.POSES_CLASSIFIER, add_info='without sparse coding') mixedclassifier_simple.run_training() processes['mixed_class'] = Classifier( term_queue, mixedclassifier_simple, function=mixedclassifier_simple.run_mixer, input_queues=[out_rf, out_svm], output_queues=res_mixed, name='mixed_class') processes['mixed_class'].setparam(just_scores=False) processes['stream_proc'] = KinectStreamer(term_queue, kin_stream) signal.signal( signal.SIGINT, lambda sig, frame: signal_handler(sig, frame, term_queue, processes)) [processes[key].start() for key in processes] while True: time1 = time.time() if not term_queue.empty(): [processes[key].join() for key in processes] break res = res_mixed.get() print time.time() - time1
def _get_locks(name): with _locker: return _rw_locks.setdefault(name, { 'mutex1': Semaphore(1), 'mutex2': Semaphore(1), 'mutex3': Semaphore(1), 'r': Semaphore(1), 'w': Semaphore(1), 'rcnt': 0, 'wcnt': 0 })
def __init__(self, dataset, batch_size, batchifier, pool, shuffle= False, use_padding = False, no_semaphore= 20): print("----------------- Iterator",batch_size) # Filtered games games = dataset.get_data() # print("games = {}".format(games)) # print("dataset = {} ".format(dataset)) # exit() games = batchifier.filter(games) if shuffle: random.shuffle(games) self.n_examples = len(games) self.batch_size = batch_size self.n_batches = int(math.ceil(1. * self.n_examples / self.batch_size)) batch = split_batch(games, batch_size, use_padding) print("++++ Iterator | n_examples = {},batch_size={},n_batches={}".format(self.n_examples,self.batch_size,self.n_batches)) print("shape({},{})".format(len(batch),len(batch[0]))) # no proc # self.it = (batchifier.apply(b )for b in batch) # Multi_proc self.semaphores = Semaphore(no_semaphore) it_batch = sem_iterator(l=batch, sem=self.semaphores) self.process_iterator = pool.imap(batchifier.apply, it_batch)
def __init__(self, name, urlnum_max, data_path=None, recordlen_min=3, rand_biase=0): """ :param name: string, the data file name :param urlnum_max: int, the max numbers of url :param data_path: the directory of the data. data_path :param recordlen_min: the minimal length of the url routes :param rand_biase: a random value to build a different dataset between different server """ self.manager = Manager() self.lock = Lock() self.semaphore = Semaphore(4) urlvacob.UrlVacob.__init__(self, '', urlnum_max, 2) self._name = name self._data_path = self._get_default_path() if data_path == None \ else data_path self._cache_path = os.path.join(dataset.ROOT_DIR, 'data', 'cache') self._rcdlen_min = recordlen_min self._window = 3 #default self._data_index = rand_biase self._routes = list() #process shared list
def run(self, tasks, build_config, parallel_threads): semaphore = Semaphore(parallel_threads) process_finished_notify = Condition(Lock()) while tasks.count_buildable_tasks() > 0: task = tasks.get_next() if task is None: self.wait_tasks_to_complete(parallel_threads, process_finished_notify, semaphore) continue semaphore.acquire() task.state = Task.State.RUNNING logging.debug("Starting task %s", task.name) self.start_new_process(process_finished_notify, semaphore, self.process_job, task, build_config) self.wait_tasks_to_complete(parallel_threads, process_finished_notify, semaphore) if tasks.count(Task.State.FAILED) > 0: logging.error('Some packages failed to build.') logging.error(" %s", tasks.print_name(Task.State.FAILED)) return 1 if tasks.count(Task.State.RUNNING) > 0: logging.error( 'Something went wrong, there are still some running tasks.') return 1 if tasks.count(Task.State.NEW) > 0: logging.error( 'Something went wrong, there are still unprocessed tasks.') return 1 logging.info("Build completed successfully.") return 0
def _init_pool(self): """ Pool initialization Worker with parameters: - self._process_n (processes pool length) - self._thread_n (threads pool length for process) - self._daemon (True if daemon threads) - self._kill (True if kill main process when shutdown) - self._debug (True if debug mode) - self._logger (logger) """ self._pool_initialized = True self._process_n = getattr(self, '_process_n', max(2, cpu_count())) self._thread_n = getattr(self, '_thread_n', 64) self._daemon = getattr(self, '_daemon', False) self._kill = getattr(self, '_kill', True) self._debug = getattr(self, '_debug', False) self._logger = getattr(self, '_logger', None) self._keep_running = Value('i', 1) self._shutdown_event = Event() self._shutdown_event.clear() self._event = Event() self._semaphore = Semaphore(1) self._semaphore.acquire() self._closed = False self._maintain_pool()
def __init__(self, num_processes=1): # Set up sync primitives, to communicate with the spawned children self.num_processes = num_processes # This semaphore is used as a "worker pool guard" to keep the number # of spawned workers in the pool to the specified maximum (and block # the .spawn_child() call after that) self._semaphore = Semaphore(num_processes) # This array of integers represents a slot per worker and holds the # actual pids (process ids) of the worker's children. Initially, the # array-of-pids is all zeroes. When a new child is spawned, the pid # is written into the slot. WHen a child finishes, it resets its own # slot to 0 again, effectively freeing up the slot (and allowing new # children to be spawned). self._pids = Array('i', [0] * num_processes) # This array of integers also represents a slot per worker and also # holds the actual pids of the worker's children. The difference with # _pids, however, is that this array's slots don't get reset # immediately when the children end. In order for Unix subprocesses # to actually disappear from the process list (and freeing up the # memory), they need to be waitpid()'ed for by the parent process. # When each new child is spawned, it waitpid()'s for the (finished) # child that was previously in that slot before it claims the new # slot. This mainly avoids ever-growing process lists and slowly # growing the memory footprint. self._waitfor = Array('i', [0] * num_processes) # This array of booleans represent workers that are in their idle # state (i.e. they are waiting for work). During this time, it is # safe to terminate them when the user requests so. Once they start # processing work, they flip their idle state and won't be terminated # while they're still doing work. self._idle = Array('b', [False] * num_processes)
def run(): algo = parameters['algo'] files = [open(x) for x in parameters['files']] configs = [] p = parameters['params'] max_processes = 3 semaphore = Semaphore(max_processes) # generate configurations as compination of possible # keys and product of values for keys in it.combinations(p.keys(), len(p.keys())): v = [p[k] for k in keys] for values in it.product(*v): config = {} for i, k in enumerate(keys): config[k] = values[i] configs.append(config) for f in files: for conf in configs: config = {'FILENAME': f.name} config.update(conf) f.seek(0) num_vars, clauses = parser.parse(f) p = MyProcess(target=run_algorithm, args=(algo, num_vars, clauses, config, semaphore)) semaphore.acquire() p.start()
def __init__(self, dataset, batch_size, batchifier, pool, shuffle=False, use_padding=False, no_semaphore=20): # Filtered games games = dataset.get_data() games = batchifier.filter(games) if shuffle: random.shuffle(games) self.n_examples = len(games) self.batch_size = batch_size self.n_batches = int(math.ceil(1. * self.n_examples / self.batch_size)) batch = split_batch(games, batch_size, use_padding) # no proc # self.it = (batchifier.apply(b )for b in batch) # Multi_proc self.semaphores = Semaphore(no_semaphore) it_batch = sem_iterator(l=batch, sem=self.semaphores) self.process_iterator = pool.imap(batchifier.apply, it_batch)
def get_spanning_reads_of_aligned_pacbio_reads(self, alignment_file): sema = Semaphore(settings.CORES) manager = Manager() length_distribution = manager.list() mapped_spanning_reads = manager.list() vntr_start = self.reference_vntr.start_point vntr_end = self.reference_vntr.start_point + self.reference_vntr.get_length( ) region_start = vntr_start region_end = vntr_end read_mode = 'r' if alignment_file.endswith('sam') else 'rb' samfile = pysam.AlignmentFile(alignment_file, read_mode) reference = get_reference_genome_of_alignment_file(samfile) chromosome = self.reference_vntr.chromosome if reference == 'HG19' else self.reference_vntr.chromosome[ 3:] process_list = [] for read in samfile.fetch(chromosome, region_start, region_end): sema.acquire() p = Process(target=self.check_if_pacbio_read_spans_vntr, args=(sema, read, length_distribution, mapped_spanning_reads)) process_list.append(p) p.start() for p in process_list: p.join() logging.info('length_distribution of mapped spanning reads: %s' % list(length_distribution)) return list(mapped_spanning_reads)
def main(): tic = time.perf_counter() parser = make_parser() parse(parser.parse_args()) # os.chdir(source) get_files() match_files() pages = list(chunks(matches, lines)) pages = name_pages(pages) i = 0 processes = [] concurrency = thread_count print("Currently using " + str(concurrency) + " Thread(s)") sema = Semaphore(concurrency) for page in pages: sema.acquire() progress(i + 1, len(pages) * 2, "Processing page" + str(i + 1) + " of " + str(len(pages))) process = multiprocessing.Process(target=make_page, args=(page, sema,)) processes.append(process) process.start() i += 1 for process in processes: progress(i + 1, len(pages) * 2, "Finishing page " + str((i + 1) - len(pages)) + " of " + str(len(pages))) process.join() i += 1 toc = time.perf_counter() print(f"\nFinished merging in {toc - tic:0.4f} seconds") print("\nPages have been stored at ", dest)
def identify_similar_regions_for_vntrs_using_blat(): from multiprocessing import Process, Semaphore, Manager reference_vntrs = load_unique_vntrs_data() sema = Semaphore(24) manager = Manager() result_list = manager.list() process_list = [] # os.system('cp hg19_chromosomes/CombinedHG19_Reference.fa /tmp/CombinedHG19_Reference.fa') for i in range(len(reference_vntrs)): if not reference_vntrs[i].is_non_overlapping( ) or reference_vntrs[i].has_homologous_vntr(): continue sema.acquire() p = Process(target=find_similar_region_for_vntr, args=(sema, reference_vntrs[i], i, result_list)) process_list.append(p) p.start() for p in process_list: p.join() result_list = list(result_list) with open('similar_vntrs.txt', 'a') as out: for vntr_id in result_list: out.write('%s\n' % vntr_id)
def serve(self): """Start workers and put into queue""" # this is a shared state that can tell the workers to exit when False self.isRunning.value = True # first bind and listen to the port self.serverTransport.listen() # fork the children semaphore = Semaphore(0) for _ in range(self.numWorkers): try: w = Process(target=self.workerProcess, args=(semaphore, )) w.daemon = True w.start() self.workers.append(w) except Exception as x: logger.exception(x) # wait until all workers init finish for _ in range(self.numWorkers): semaphore.acquire() # wait until the condition is set by stop() while True: try: gevent.sleep(1) if not self.isRunning.value: break except (SystemExit, KeyboardInterrupt): break except Exception as x: logger.exception(x) self.isRunning.value = False
def _readtimestepsbond(self): # added on 2018-12-15 stepatomfiles = {} self._mkdir(self.trajatom_dir) with Pool(self.nproc, maxtasksperchild=10000) as pool: semaphore = Semaphore(360) results = pool.imap_unordered( self.bonddetector.readatombondtype, self._produce( semaphore, enumerate( zip(self.lineiter(self.bonddetector), self.erroriter() ) if self.errorfilename is not None else self. lineiter(self.bonddetector)), (self.errorfilename is not None)), 100) nstep = 0 for d, step in tqdm(results, desc="Read trajectory", unit="timestep"): for bondtypebytes, atomids in d.items(): bondtype = self._bondtype(bondtypebytes) if bondtype not in self.atombondtype: self.atombondtype.append(bondtype) stepatomfiles[bondtype] = open( os.path.join(self.trajatom_dir, f'stepatom.{bondtype}'), 'wb') stepatomfiles[bondtype].write( self.listtobytes([step, atomids])) semaphore.release() nstep += 1 pool.close() self._nstep = nstep for stepatomfile in stepatomfiles.values(): stepatomfile.close() pool.join()
def _get_all(embeddings, data_sequence, start_iteration, ent_type, w_size, batch_size, processes, evalutation_semaphore=None): # The embed semaphore makes sure that the EmbedWithContext will not over produce results in relation # to the LookUpBySurfaceAndContext creation embed_semaphore = Semaphore(100) for it, link_result in \ enumerate( EmbedWithContext.run(embeddings, data_sequence, ent_type, w_size, batch_size, processes, embed_semaphore, start_iteration=start_iteration)): try: if evalutation_semaphore is not None: evalutation_semaphore.acquire(timeout=10) yield LookUpBySurfaceAndContext(link_result) except Exception as ex: print(type(ex)) print("Error: ", link_result) raise if it % batch_size == 0: embed_semaphore.release()
def identify_similar_regions_for_vntrs_using_blat(): from multiprocessing import Process, Semaphore, Manager reference_vntrs = load_unique_vntrs_data() records = [] for ref_vntr in reference_vntrs: record = SeqRecord.SeqRecord('') sequence = ref_vntr.left_flanking_region[ -30:] + ref_vntr.pattern + ref_vntr.right_flanking_region[:30] record.seq = Seq.Seq(sequence) record.id = str(ref_vntr.id) records.append(record) vntr_structures_file = 'reference_vntr_structures.fa' with open(vntr_structures_file, 'w') as output_handle: SeqIO.write(records, output_handle, 'fasta') sema = Semaphore(7) manager = Manager() result_list = manager.list() process_list = [] for ref_vntr in reference_vntrs: sema.acquire() p = Process(target=find_similar_region_for_vntr, args=(sema, ref_vntr, vntr_structures_file, result_list)) process_list.append(p) p.start() for p in process_list: p.join() result_list = list(result_list) with open('similar_vntrs.txt', 'a') as out: for vntr_id in result_list: out.write('%s\n' % vntr_id)
def _spawn(self): """ Spawn spider processes. """ # create a unique result file name def create_result_file_name(dd): clean_domain = "".join(c if c in string.ascii_letters + string.digits else "_" for c in dd) return clean_domain + time.strftime("__%Y%m%d_%H%M%S.txt") self._sema = Semaphore(self.max_processes) # the spiders can crawl independently and have no common resources for u in self._urls: d = urlsplit(u).netloc.lower() s = Spider(u, d, self.limit, self.limit_param, os.path.join(os.getcwd(), create_result_file_name(d)), self.max_threads, self._sema, self.verbose) p = Process(target=Spider.crawl, args=(s, )) self._sema.acquire(True) # acquire semaphore for next spider p.start() if self.verbose: print("[+] Spawned spider for: {}".format(u))
def test_append_kill(library, mongo_host, library_name): # Empty DF to start df = DataFrame({'v': []}, []) df.index.name = 'index' library.write('symbol', df) sem = Semaphore(0) def run_append(end): app_1 = Appender(mongo_host, library_name, sem, 0, end) proc = Process(target=app_1.run) proc.start() sem.release() return proc def check_written(): sym = library.read('symbol') print("Checking written %d" % len(sym.data)) # time how long it takes to do an append operation start = datetime.now() proc = run_append(1) proc.join() check_written() time_taken = (datetime.now() - start).total_seconds() for i in range(100): print("Loop %d" % i) proc = run_append(100) # kill it randomly time.sleep(2 * (random.random() * time_taken)) # Forcibly kill it proc.terminate() # Check we can read the data check_written()
def __init__(self, *args, **kwargs): self.url = kwargs.get("url") if not self.url: raise Exception("No URL to gather") self.max_depth = kwargs.get("depth", 1) self.workers = kwargs.get("workers", 1) self.max_errors = kwargs.get("acceptable_errors", None) self.out = kwargs.get("out", "/tmp/") if not self.out.endswith("/"): self.out += "/" self.out += "url_gather/" if not os.path.exists(self.out): os.makedirs(self.out) self.collector_file = kwargs.get("collector_file") self.collector_class = kwargs.get("collector_class") self._load_collector() self._gathered_urls = set() # initiate multiprocessing resources self._pool = Pool(self.workers) self._semaphore = Semaphore(self.workers) self._manager = Manager() self._url_children = self._manager.dict() self._url_errors = self._manager.dict() self._url_events = {}