def conc_register(self, user_id, corpus_id, subc_name, subchash, query, samplesize, time_limit): """ Register concordance calculation and initiate the calculation. arguments: user_id -- an identifier of the user who entered the query (used to specify subc. directory if needed) corpus_id -- a corpus identifier subc_name -- a sub-corpus identifier (None if not used) subchash -- a MD5 checksum of the sub-corpus data file query -- a query tuple samplesize -- a row number limit (if 0 then unlimited - see Manatee API) time_limit -- a time limit (in seconds) for the main conc. task returns: a dict(cachefile=..., pidfile=..., stored_pidfile=...) """ reg_fn = concworker.TaskRegistration(task_id=self.request.id) subc_path = os.path.join(settings.get('corpora', 'users_subcpath'), str(user_id)) pub_path = os.path.join(settings.get('corpora', 'users_subcpath'), 'published') initial_args = reg_fn(corpus_id, subc_name, subchash, (subc_path, pub_path), query, samplesize) if not initial_args['already_running']: # we are first trying to calc this app.send_task('worker.conc_calculate', args=(initial_args, user_id, corpus_id, subc_name, subchash, query, samplesize), soft_time_limit=time_limit) return initial_args
def create_task(user_id, corp, subchash, q, samplesize): task_id = str(uuid.uuid1()) reg_fn = concworker.TaskRegistration(task_id=task_id) corpus_id = corp.corpname subcname = getattr(corp, 'subcname', None) initial_args = reg_fn(corpus_id, subcname, subchash, q, samplesize) if not initial_args['already_running']: # we are first trying to calc this def run(): with plugins.runtime.CONC_CACHE as cc: task = concworker.ConcCalculation(task_id=task_id, cache_factory=cc.fork()) subc_path = '%s/%s' % (settings.get('corpora', 'users_subcpath'), user_id) return task(initial_args, subc_path, corpus_id, subcname, subchash, q, samplesize) proc = Process(target=run) else: proc = EmptyTask() return proc
def conc_register(self, user_id, corpus_id, subc_name, subchash, query, samplesize): """ Register concordance calculation and initiate the calculation. arguments: user_id -- an identifier of the user who entered the query (used to specify subc. directory if needed) corpus_id -- a corpus identifier subc_name -- a sub-corpus identifier (None if not used) subchash -- a MD5 checksum of the sub-corpus data file query -- a query tuple samplesize -- a row number limit (if 0 then unlimited - see Manatee API) returns: a dict(cachefile=..., pidfile=..., stored_pidfile=...) """ reg_fn = concworker.TaskRegistration(task_id=self.request.id) subc_path = '%s/%s' % (settings.get('corpora', 'users_subcpath'), user_id) initial_args = reg_fn(corpus_id, subc_name, subchash, subc_path, query, samplesize) if not initial_args['already_running']: # we are first trying to calc this conc_calculate.delay(initial_args, user_id, corpus_id, subc_name, subchash, query, samplesize) return initial_args