async def arbiter_last_task(): print(get_actor().name + ': Tworzę model tfidf') dictionary = corpora.Dictionary.load(config.dictionary_path) corpus_gen = nlp_utils.doc_gen(config.bow_corpus_path) tfidf = models.TfidfModel(corpus=corpus_gen, id2word=dictionary) tfidf.save(config.tfidf_model_path) print(get_actor().name + ': Model tfidf utworzony')
async def download_homepages(request, message): print("DOWNLOAD", message) current_actor = get_actor() request.actor.logger.info("Download homepages: " + str(current_actor) + " Urls: " + str(message)) urls = message https = [] for url in urls: with urllib.request.urlopen(url) as conn: http = conn.read() https.append([http]) spawned_actors = [] for i, http in enumerate(https): spawned_actors.append( spawn(name=current_actor.name + "_extractor_" + str(i))) for actor in spawned_actors: await actor sent_actions = [] for actor, http in zip(spawned_actors, https): sent_actions.append(send(actor, 'extract_data', http)) for action in sent_actions: result = await action for actor in spawned_actors: await send(actor, 'stop') return get_actor().name
async def work(name, docs): print(get_actor().name + ': Przetwarzam plik ' + name) bows = [] for doc in docs: bows.append(get_actor().extra['dict'].doc2bow(doc)) with open(config.bow_corpus_path + name, 'wb') as f: pickle.dump(bows, f) await asyncio.sleep(config.middle_task_wait)
async def work(name, docs): print(get_actor().name + ': Przetwarzam plik ' + name) corpus = [] for doc in docs: corpus.append(get_actor().extra['tfidf'][doc]) with open(config.tfidf_corpus_path + name, 'wb') as f: pickle.dump(corpus, f) await asyncio.sleep(config.middle_task_wait)
async def arbiter_last_task(): print(get_actor().name + ': Zaczynam tworzenie słownika') gen = nlp_utils.doc_gen(config.tokenized_corpus_path) dictionary = corpora.Dictionary(gen) dictionary.filter_extremes(no_below=0, no_above=0.5, keep_n=None) dictionary.compactify() dictionary.save(config.dictionary_path) print(get_actor().name + ': Tworzenie słownika zakończone!')
async def arbiter_last_task(): print(get_actor().name + ': Tworzę model lsi') dictionary = corpora.Dictionary.load(config.dictionary_path) corpus_gen = nlp_utils.doc_gen(config.tfidf_corpus_path) lsi = models.LsiModel(corpus=corpus_gen, id2word=dictionary, num_topics=config.lsi_topics) lsi.save(config.lsi_model_path) print(get_actor().name + ': Model lsi utworzony')
def work_gen(query): gen = nlp_utils.page_gen(config.index_path) query = nlp_utils.extract_tokens_from_markdown(query) query = get_actor().extra['dict'].doc2bow(query) query = get_actor().extra['tfidf'][query] query = get_actor().extra['lsi'][query] for name, index in gen: yield partial(work, index, name, query)
async def arbiter_last_task(): print(get_actor().name + ': Porządkuję wyniki') result = sorted(get_actor().extra['results'], key=lambda x: -x['score'])[:config.results_count] print('\n\n') for elem in result: print(elem) print('\n\n') with open(config.search_result_path, 'wb') as f: pickle.dump(result, f) print(get_actor().name + ': Wyniki zapisano do ' + config.search_result_path)
async def work(name, docs): print(get_actor().name + ': Przetwarzam plik ' + name) corpus = [] for doc in docs: corpus.append(get_actor().extra['lsi'][doc]) with open(config.lsi_corpus_path + name, 'wb') as f: pickle.dump(corpus, f) index = similarities.MatrixSimilarity(corpus) index_name = 'index' + nlp_utils.get_id_from_name(name) + '.idx' index.save(config.index_path + index_name) await asyncio.sleep(config.middle_task_wait)
def test_registered(self): '''Test the arbiter in its process domain''' arbiter = get_actor() self.assertTrue(arbiter.is_arbiter()) self.assertTrue(arbiter.registered) self.assertTrue('arbiter' in arbiter.registered) self.assertTrue('test' in arbiter.registered)
def test_arbiter_mailbox(self): arbiter = get_actor() mailbox = arbiter.mailbox self.assertFalse(hasattr(mailbox, 'request')) # Same for all monitors mailboxes for monitor in arbiter.monitors.values(): mailbox = monitor.mailbox self.assertFalse(hasattr(mailbox, 'request'))
async def testMeta(self): app = await get_application(self.app_cfg.name) self.assertEqual(app.name, self.app_cfg.name) monitor = get_actor().get_actor(app.name) self.assertTrue(monitor.is_running()) self.assertEqual(app, monitor.app) self.assertEqual(str(app), app.name) self.assertEqual(app.cfg.bind, '127.0.0.1:0')
async def work(index, name, query): print(get_actor().name + ': Przetwarzam plik ' + name) page_id = int(nlp_utils.get_id_from_name(name)) page = get_actor().extra['gh'].get_page(page_id) sims = index[query] result = [] for i, elem in enumerate(sims): result.append({ 'score': elem, 'name': page[i]['name'], 'url': page[i]['html_url'] }) result += get_actor().extra['result'] result = sorted(result, key=lambda x: -x['score'])[:config.results_count] get_actor().extra['result'] = result await asyncio.sleep(config.middle_task_wait)
def testFunctionFromConfigFile(self): worker = get_actor() cfg = config() self.assertEqual(cfg.connection_made(worker), None) module_name = 'tests.utils' self.assertEqual(cfg.import_from_module(module_name)[0], ('foo', 5)) self.assertEqual(cfg.connection_made(worker), worker) cfg1 = pickle.loads(pickle.dumps(cfg)) self.assertEqual(cfg1.connection_made(worker), worker)
def testFunction(self): cfg = config() worker = get_actor() self.assertTrue(cfg.post_fork) self.assertEqual(cfg.post_fork(worker), None) cfg.set('post_fork', post_fork) self.assertEqual(cfg.post_fork(worker), worker) cfg1 = pickle.loads(pickle.dumps(cfg)) self.assertEqual(cfg1.post_fork(worker), worker)
async def work(arg, gh): json = gh.get_page(arg) tokens = [] for repo in json: print(get_actor().name + ': Przetwarzam ' + repo['name']) tokens.append(nlp_utils.extract_tokens_from_markdown(repo['readme'])) await asyncio.sleep(config.middle_task_wait) filename = 'corpus' + str(arg) + '.crp' with open(config.tokenized_corpus_path + filename, 'wb') as f: pickle.dump(tokens, f)
def dont_run_with_thread(obj): '''Decorator for disabling process based test cases when the test suite runs in threading, rather than processing, mode. ''' actor = get_actor() if actor: d = unittest.skipUnless(actor.cfg.concurrency == 'process', 'Run only when concurrency is process') return d(obj) else: return obj
def test_arbiter_object(self): '''Test the arbiter in its process domain''' arbiter = get_actor() self.assertEqual(arbiter, get_arbiter()) self.assertTrue(arbiter.is_arbiter()) self.assertEqual(arbiter.concurrency.kind, 'arbiter') self.assertEqual(arbiter.aid, 'arbiter') self.assertEqual(arbiter.name, 'arbiter') self.assertTrue(arbiter.monitors) self.assertEqual(arbiter.exit_code, None) info = arbiter.info() self.assertTrue('server' in info) server = info['server'] self.assertEqual(server['state'], 'running')
async def test_simple_spawn(self): '''Test start and stop for a standard actor on the arbiter domain.''' proxy = await self.spawn_actor(name='simple-actor-on-%s' % self.concurrency) arbiter = get_actor() proxy_monitor = arbiter.get_actor(proxy.aid) self.assertEqual(proxy_monitor, proxy) self.assertEqual(await send(proxy, 'ping'), 'pong') self.assertEqual(await send(proxy.proxy, 'echo', 'Hello!'), 'Hello!') # We call the ActorTestMixin.stop_actors method here, since the # ActorTestMixin.tearDown method is invoked on the test-worker domain # (here we are in the arbiter domain) await self.stop_actors(proxy) is_alive = await async_while(3, proxy_monitor.is_alive) self.assertFalse(is_alive)
async def test_actor_timeout(self): """Test a bogus actor for timeout""" arbiter = get_actor() self.assertTrue(arbiter.is_arbiter()) name = 'bogus-timeout-%s' % self.concurrency proxy = await self.spawn_actor(name=name, timeout=1) self.assertEqual(proxy.name, name) self.assertTrue(proxy.aid in arbiter.managed_actors) proxy = arbiter.managed_actors[proxy.aid] await send(proxy, 'run', cause_timeout) # The arbiter should soon start to stop the actor await wait_for_stop(self, proxy.aid, True) # self.assertTrue(proxy.stopping_start) self.assertFalse(proxy.aid in arbiter.managed_actors)
async def test_terminate(self): arbiter = get_actor() self.assertTrue(arbiter.is_arbiter()) name = 'bogus-term-%s' % self.concurrency proxy = await self.spawn_actor(name=name, timeout=1) self.assertEqual(proxy.name, name) self.assertTrue(proxy.aid in arbiter.managed_actors) proxy = arbiter.managed_actors[proxy.aid] # await send(proxy, 'run', cause_terminate) # # The arbiter should now terminate the actor await wait_for_stop(self, proxy.aid, True) # self.assertTrue(proxy.stopping_start)
async def parse_homepages_from_wiki_lvl1(request, message): print("PARSE_FROM_WIKI", message) current_actor = get_actor() request.actor.logger.info("Actor: " + str(current_actor) + " Indexes: " + str(message)) wikicfp_file = open(WIKICFP_FILENAME, "r") wikicfp_file_lines = wikicfp_file.readlines()[message[0]: message[1]] wikicfp_file.close() conference_links = [] for i in range(len(wikicfp_file_lines)): stripped_line = wikicfp_file_lines[i].strip() print("Stripped line:" , stripped_line) conference_link = find_conf_link_one(stripped_line) print("conferecne link", conference_link) if conference_link == None: continue conference_links.append(conference_link) f = open(CONFERENCES_HOMEPAGES_FILENAME, 'a') for link in conference_links: f.write(link + '\n') f.close() print(conference_links) conference_links_chunks = split_to_chunks(conference_links, 4) spawned_actors = [] for i, links_chunk in enumerate(conference_links_chunks): spawned_actors.append(spawn(name=current_actor.name+"_"+str(i))) for actor in spawned_actors: await actor sent_actions = [] for actor, chunk in zip(spawned_actors, conference_links_chunks): sent_actions.append(send(actor, 'download_homepages', chunk)) for action in sent_actions: result = await action print(result) for actor in spawned_actors: await send(actor, 'stop') return
async def __call__(self): cfg = get_actor().cfg addr = cfg.get('%s_server' % self.name) if addr: if ('%s://' % self.name) not in addr: addr = '%s://%s' % (self.name, addr) try: sync_store = create_store(addr) except ImproperlyConfigured: return False try: await sync_store.ping() return True except Exception: return False else: return False
async def test_start(self): arbiter = get_actor() app = self.create(name='pluto') self.assertTrue(app) self.assertFalse(arbiter.get_actor('pluto')) self.assertFalse(arbiter.get_actor('rpc_pluto')) # create the application await app.start() monitor1 = arbiter.get_actor('pluto') self.assertTrue(monitor1) self.assertTrue(monitor1.is_monitor()) monitor2 = arbiter.get_actor('rpc_pluto') self.assertTrue(monitor2) self.assertTrue(monitor2.is_monitor()) await asyncio.sleep(2) await monitor1.stop() await monitor2.stop()
def __call__(self, argv, start=True, get_app=False): self.app.callable.command = self.name app = self.app server = self.pulsar_app(argv, self.wsgiApp, server_software=app.config['SERVER_NAME']) if server.cfg.nominify: app.params['MINIFIED_MEDIA'] = False if start and not server.logger: # pragma nocover if not get_actor(): clear_logger() app._started = server() app.fire_event('on_start', data=server) arbiter().start() if not start: return app if get_app else server
async def test_spawning_in_arbiter(self): arbiter = get_actor() self.assertEqual(arbiter.name, 'arbiter') self.assertTrue(len(arbiter.monitors) >= 1) name = 'testSpawning-%s' % self.concurrency future = spawn(name=name, concurrency=self.concurrency) self.assertTrue(future.aid in arbiter.managed_actors) proxy = await future self.assertEqual(future.aid, proxy.aid) self.assertEqual(proxy.name, name) self.assertTrue(proxy.aid in arbiter.managed_actors) self.assertEqual(proxy, arbiter.get_actor(proxy.aid)) # await asyncio.sleep(1) # Stop the actor result = await send(proxy, 'stop') self.assertEqual(result, None) # result = await wait_for_stop(self, proxy.aid) self.assertEqual(result, None)
async def test_config_command(self): proxy = await self.spawn_actor(name='actor-test-config-%s' % self.concurrency) arbiter = get_actor() proxy_monitor = arbiter.get_actor(proxy.aid) result = await send(proxy, 'config', 'khjkh', 'name') self.assertEqual(result, None) result = await send(proxy, 'config', 'get', 'concurrency') self.assertEqual(result, self.concurrency) result = await send(proxy, 'config', 'get', 'concurrency', 'foo') self.assertEqual(result, None) # result = await send(proxy, 'config', 'set', 'max_requests', 1000, 1000) self.assertEqual(result, None) result = await send(proxy, 'config', 'set', 'max_requests', 1000) self.assertEqual(result, True) result = await send(proxy, 'config', 'get', 'max_requests') self.assertEqual(result, 1000) # await self.stop_actors(proxy) is_alive = await async_while(3, proxy_monitor.is_alive) self.assertFalse(is_alive)
async def test_spawn_from_actor(self): proxy = await self.spawn_actor(name='spawning-actor-%s' % self.concurrency) arbiter = get_actor() self.assertTrue(repr(proxy).startswith('spawning-actor-')) self.assertEqual(proxy, proxy.proxy) proxy_monitor = arbiter.get_actor(proxy.aid) self.assertFalse(proxy != proxy_monitor) # # do the spawning name = 'spawned-actor-%s-from-actor' % self.concurrency aid = await send(proxy, 'run', spawn_actor_from_actor, name) self.assertTrue(aid) proxy_monitor2 = arbiter.get_actor(aid) self.assertEqual(proxy_monitor2.name, name) self.assertNotEquals(proxy_monitor, proxy_monitor2) # # stop them await self.stop_actors(proxy, proxy_monitor2) is_alive = await async_while(3, proxy_monitor.is_alive) self.assertFalse(is_alive) is_alive = await async_while(3, proxy_monitor2.is_alive) self.assertFalse(is_alive)
def actor_init_task(): print(get_actor().name + ': Wczytuję model lsi...') lsi = models.LsiModel.load(config.lsi_model_path) get_actor().extra['lsi'] = lsi
def test_TestSuiteMonitor(self): arbiter = get_actor() self.assertTrue(len(arbiter.monitors) >= 1) monitor = arbiter.registered['test'] app = monitor.app self.assertTrue(isinstance(app, TestSuite))
def test_bad_monitor(self): arbiter = get_actor() self.assertTrue(arbiter.monitors) name = list(arbiter.monitors.values())[0].name self.assertRaises(KeyError, arbiter.add_monitor, name)
def testMe(self): worker = get_actor() info = system.process_info(worker.pid) system.process_info() self.assertTrue(isinstance(info, dict))