def _create_subcorpus(self, request): """ req. arguments: subcname -- name of new subcorpus create -- bool, sets whether to create new subcorpus cql -- custom within condition """ subcname = request.form['subcname'] within_json = request.form.get('within_json') raw_cql = request.form.get('cql') aligned_corpora = request.form.getlist('aligned_corpora') publish = bool(int(request.form.get('publish'))) corpus_info = self.get_corpus_info(self.args.corpname) description = request.form.get('description') if raw_cql: aligned_corpora = [] tt_query = () within_cql = raw_cql full_cql = 'aword,[] %s' % raw_cql imp_cql = (full_cql, ) elif within_json: # user entered a subcorpus query manually aligned_corpora = [] tt_query = () within_cql = self._deserialize_custom_within( json.loads(within_json)) full_cql = 'aword,[] %s' % within_cql imp_cql = (full_cql, ) elif len(aligned_corpora ) > 0 and plugins.runtime.LIVE_ATTRIBUTES.exists: if corpus_info.metadata.label_attr and corpus_info.metadata.id_attr: within_cql = None attrs = json.loads(request.form.get('attrs', '{}')) sel_match = plugins.runtime.LIVE_ATTRIBUTES.instance.get_attr_values( self._plugin_api, corpus=self.corp, attr_map=attrs, aligned_corpora=aligned_corpora, limit_lists=False) values = sel_match['attr_values'][ corpus_info.metadata.label_attr] args = argmapping.Args() setattr(args, 'sca_{0}'.format(corpus_info.metadata.id_attr), [v[1] for v in values]) tt_query = TextTypeCollector(self.corp, args).get_query() tmp = ['<%s %s />' % item for item in tt_query] full_cql = ' within '.join(tmp) full_cql = 'aword,[] within %s' % full_cql full_cql = import_string(full_cql, from_encoding=self.corp_encoding) imp_cql = (full_cql, ) else: raise FunctionNotSupported( 'Corpus must have a bibliography item defined to support this function' ) else: within_cql = None tt_query = TextTypeCollector(self.corp, request).get_query() tmp = ['<%s %s />' % item for item in tt_query] full_cql = ' within '.join(tmp) full_cql = 'aword,[] within %s' % full_cql full_cql = import_string(full_cql, from_encoding=self.corp_encoding) imp_cql = (full_cql, ) basecorpname = self.args.corpname.split(':')[0] if not subcname: raise UserActionException(_('No subcorpus name specified!')) path = self.prepare_subc_path(basecorpname, subcname, publish=False) publish_path = self.prepare_subc_path( basecorpname, subcname, publish=True) if publish else None if type(path) == unicode: path = path.encode('utf-8') if len(tt_query) == 1 and len(aligned_corpora) == 0: result = corplib.create_subcorpus(path, self.corp, tt_query[0][0], tt_query[0][1]) if result and publish_path: corplib.mk_publish_links(path, publish_path, description) elif len(tt_query) > 1 or within_cql or len(aligned_corpora) > 0: backend, conf = settings.get_full('global', 'calc_backend') if backend == 'celery': import task app = task.get_celery_app(conf['conf']) res = app.send_task( 'worker.create_subcorpus', (self.session_get('user', 'id'), self.args.corpname, path, publish_path, tt_query, imp_cql, description), time_limit=TASK_TIME_LIMIT) self._store_async_task( AsyncTaskStatus( status=res.status, ident=res.id, category=AsyncTaskStatus.CATEGORY_SUBCORPUS, label=u'%s:%s' % (basecorpname, subcname), args=dict(subcname=subcname, corpname=basecorpname))) result = {} elif backend == 'multiprocessing': from bgcalc import subc_calc import functools import multiprocessing worker = subc_calc.CreateSubcorpusTask( user_id=self.session_get('user', 'id'), corpus_id=self.args.corpname) multiprocessing.Process(target=functools.partial( worker.run, tt_query, imp_cql, path, publish_path, description)).start() result = {} else: raise UserActionException(_('Nothing specified!')) if result is not False: with plugins.runtime.SUBC_RESTORE as sr: try: sr.store_query(user_id=self.session_get('user', 'id'), corpname=self.args.corpname, subcname=subcname, cql=full_cql.strip().split('[]', 1)[-1]) except Exception as e: logging.getLogger(__name__).warning( 'Failed to store subcorpus query: %s' % e) self.add_system_message( 'warning', _('Subcorpus created but there was a problem saving a backup copy.' )) unfinished_corpora = filter( lambda at: not at.is_finished(), self.get_async_tasks( category=AsyncTaskStatus.CATEGORY_SUBCORPUS)) return dict( unfinished_subc=[uc.to_dict() for uc in unfinished_corpora]) else: raise SubcorpusError(_('Empty subcorpus!'))
def _create_subcorpus(self, request): """ req. arguments: subcname -- name of new subcorpus create -- bool, sets whether to create new subcorpus cql -- custom within condition """ within_cql = None form_type = request.json['form_type'] if form_type == 'tt-sel': data = CreateSubcorpusArgs.from_dict(request.json) corpus_info = self.get_corpus_info(data.corpname) if plugins.runtime.LIVE_ATTRIBUTES.exists: if corpus_info.metadata.label_attr and corpus_info.metadata.id_attr: within_cql = None sel_match = plugins.runtime.LIVE_ATTRIBUTES.instance.get_attr_values( self._plugin_api, corpus=self.corp, attr_map=data.text_types, aligned_corpora=data.aligned_corpora, limit_lists=False) sel_attrs = {} for k, vals in sel_match.get('attr_values', {}).items(): if k == corpus_info.metadata.label_attr: k = corpus_info.metadata.id_attr if '.' in k: sel_attrs[k] = [v[1] for v in vals] tt_query = TextTypeCollector(self.corp, sel_attrs).get_query() tmp = ['<%s %s />' % item for item in tt_query] full_cql = ' within '.join(tmp) full_cql = 'aword,[] within %s' % full_cql imp_cql = (full_cql, ) else: raise FunctionNotSupported( 'Corpus must have a bibliography item defined to support this function' ) else: tt_query = TextTypeCollector(self.corp, data.text_types).get_query() tmp = ['<%s %s />' % item for item in tt_query] full_cql = ' within '.join(tmp) full_cql = 'aword,[] within %s' % full_cql imp_cql = (full_cql, ) elif form_type == 'within': data = CreateSubcorpusWithinArgs.from_dict(request.json) tt_query = () within_cql = self._deserialize_custom_within(data.within) full_cql = 'aword,[] %s' % within_cql imp_cql = (full_cql, ) elif form_type == 'cql': data = CreateSubcorpusRawCQLArgs.from_dict(request.json) tt_query = () within_cql = data.cql full_cql = f'aword,[] {data.cql}' imp_cql = (full_cql, ) else: raise UserActionException( f'Invalid form type provided - "{form_type}"') if not data.subcname: raise UserActionException( translate('No subcorpus name specified!')) if data.publish and not data.description: raise UserActionException(translate('No description specified')) basecorpname = self.args.corpname.split(':')[0] path = self.prepare_subc_path(basecorpname, data.subcname, publish=False) publish_path = self.prepare_subc_path( basecorpname, data.subcname, publish=True) if data.publish else None if len(tt_query) == 1 and not data.has_aligned_corpora(): result = corplib.create_subcorpus(path, self.corp, tt_query[0][0], tt_query[0][1]) if result and publish_path: corplib.mk_publish_links(path, publish_path, self.session_get('user', 'fullname'), data.description) elif len(tt_query) > 1 or within_cql or data.has_aligned_corpora(): app = bgcalc.calc_backend_client(settings) res = app.send_task( 'create_subcorpus', (self.session_get('user', 'id'), self.args.corpname, path, publish_path, tt_query, imp_cql, self.session_get('user', 'fullname'), data.description), time_limit=TASK_TIME_LIMIT) self._store_async_task( AsyncTaskStatus(status=res.status, ident=res.id, category=AsyncTaskStatus.CATEGORY_SUBCORPUS, label=f'{basecorpname}:{data.subcname}', args=dict(subcname=data.subcname, corpname=basecorpname))) result = {} else: raise UserActionException(translate('Nothing specified!')) if result is not False: with plugins.runtime.SUBC_RESTORE as sr: try: sr.store_query(user_id=self.session_get('user', 'id'), corpname=self.args.corpname, subcname=data.subcname, cql=full_cql.strip().split('[]', 1)[-1]) except Exception as e: logging.getLogger(__name__).warning( 'Failed to store subcorpus query: %s' % e) self.add_system_message( 'warning', translate( 'Subcorpus created but there was a problem saving a backup copy.' )) unfinished_corpora = [ at for at in self.get_async_tasks( category=AsyncTaskStatus.CATEGORY_SUBCORPUS) if not at.is_finished() ] return dict( processed_subc=[uc.to_dict() for uc in unfinished_corpora]) else: raise SubcorpusError(translate('Empty subcorpus!'))