def set_favorite_item(ctrl, request): """ """ corpora = [] main_size = None for i, c_id in enumerate(request.form.getlist('corpora')): corp = ctrl.cm.get_Corpus(c_id, subcname=request.form['subcorpus_id'] if i == 0 else None) if i == 0: main_size = corp.search_size() corpora.append(dict(id=c_id, name=l10n.import_string( corp.get_conf('NAME'), corp.get_conf('ENCODING')))) subcorpus_id = request.form['subcorpus_id'] subcorpus_orig_id = request.form['subcorpus_orig_id'] item = FavoriteItem(dict( name=u' || '.join(c['name'] for c in corpora) + (u' / ' + subcorpus_orig_id if subcorpus_orig_id else u''), corpora=corpora, subcorpus_id=subcorpus_id, subcorpus_orig_id=subcorpus_orig_id, size=main_size, size_info=l10n.simplify_num(main_size) )) with plugins.runtime.USER_ITEMS as uit: uit.add_user_item(ctrl._plugin_api, item) return item.to_dict()
def set_favorite_item(self, request, conc_args): """ """ main_corp = self.cm.get_Corpus(request.form['corpus_id'], request.form['subcorpus_id']) corp_size = main_corp.search_size() data = { 'corpora': [], 'canonical_id': request.form['canonical_id'], 'corpus_id': request.form['corpus_id'], 'subcorpus_id': request.form['subcorpus_id'], 'name': request.form['name'], 'size': corp_size, 'size_info': l10n.simplify_num(corp_size), 'type': request.form['type'] } aligned_corpnames = request.form.getlist('corpora[]') for ac in aligned_corpnames: data['corpora'].append({ 'name': ac, # TODO fetch real name?? 'corpus_id': ac, 'canonical_id': self._canonical_corpname(ac), 'type': 'corpus' }) item = plugins.get('user_items').from_dict(data) plugins.get('user_items').add_user_item(self._session_get('user', 'id'), item) return {'id': item.id}
def _export_featured(self, plugin_api): permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict) def is_featured(o): return o['metadata'].get('featured', False) featured = [] for x in list(self._raw_list(plugin_api.user_lang).values()): if x['id'] in permitted_corpora and is_featured(x): featured.append({ # on client-side, this may contain also subc. id, aligned ids 'id': x['id'], 'corpus_id': x['id'], 'name': self._manatee_corpora.get_info(x['id']).name, 'size': self._manatee_corpora.get_info(x['id']).size, 'size_info': l10n.simplify_num( self._manatee_corpora.get_info(x['id']).size), 'description': self._export_untranslated_label( plugin_api, self._manatee_corpora.get_info(x['id']).description) }) return featured
def set_favorite_item(ctrl, request): """ """ corpora = [] main_size = None for i, c_id in enumerate(request.form.getlist('corpora')): corp = ctrl.cm.get_Corpus( c_id, request.form['subcorpus_id'] if i == 0 else None) if i == 0: main_size = corp.search_size() corpora.append( dict(id=c_id, canonical_id=ctrl._canonical_corpname(c_id), name=corp.get_conf('NAME'))) subcorpus_id = request.form['subcorpus_id'] item = FavoriteItem( dict(name=' + '.join(c['name'] for c in corpora) + (' : ' + subcorpus_id if subcorpus_id else ''), corpora=corpora, subcorpus_id=request.form['subcorpus_id'], size=main_size, size_info=l10n.simplify_num(main_size))) with plugins.runtime.USER_ITEMS as uit: uit.add_user_item(ctrl._plugin_api, item) return dict(id=item.ident)
def set_favorite_item(self, request, conc_args): """ """ main_corp = self.cm.get_Corpus(request.form['corpus_id'], request.form['subcorpus_id']) corp_size = main_corp.search_size() data = { 'corpora': [], 'canonical_id': request.form['canonical_id'], 'corpus_id': request.form['corpus_id'], 'subcorpus_id': request.form['subcorpus_id'], 'name': request.form['name'], 'size': corp_size, 'size_info': l10n.simplify_num(corp_size), 'type': request.form['type'] } aligned_corpnames = request.form.getlist('corpora[]') for ac in aligned_corpnames: data['corpora'].append({ 'name': ac, # TODO fetch real name?? 'corpus_id': ac, 'canonical_id': self._canonical_corpname(ac), 'type': 'corpus' }) item = plugins.get('user_items').from_dict(data) plugins.get('user_items').add_user_item( self._session_get('user', 'id'), item) return {'id': item.id}
def _export_featured(self, plugin_ctx): permitted_corpora = self._auth.permitted_corpora(plugin_ctx.user_dict) def is_featured(o): return o.metadata.featured featured = [] for x in list(self._raw_list(plugin_ctx).values()): if x.id in permitted_corpora and is_featured(x): cinfo = plugin_ctx.corpus_manager.get_info(x.id) featured.append({ # on client-side, this may contain also subc. id, aligned ids 'id': x.id, 'corpus_id': x.id, 'name': cinfo.name, 'size': cinfo.size, 'size_info': l10n.simplify_num(cinfo.size), 'description': self._export_untranslated_label(plugin_ctx, cinfo.description) }) return featured
def __init__(self, id=None, corpus_id=None, name=None, description=None, size=0, path=None, featured=False, keywords=None): self.id = id self.corpus_id = corpus_id self.name = name self.description = description self.size = size self.size_info = l10n.simplify_num(size) self.path = path self.featured = featured self.found_in = [] self.keywords = [] if keywords is None else keywords
def __init__(self, data=None): if data is None: data = {} self.name = data.get('name', 'New item') self.corpora = data.get('corpora', []) self.size = data.get('size', None) self.size_info = l10n.simplify_num(self.size) if self.size else None self.subcorpus_id = data.get('subcorpus_id', None) self.subcorpus_orig_id = data.get('subcorpus_orig_id', self.subcorpus_id) self.ident = data.get('id', hashlib.md5(self.sort_key.encode()).hexdigest())
def _export_featured(self, user_id): permitted_corpora = self._auth.permitted_corpora(user_id) is_featured = lambda o: o["metadata"].get("featured", False) featured = [] for x in self._raw_list().values(): if x["id"] in permitted_corpora and is_featured(x): featured.append( { "id": permitted_corpora[x["id"]], "name": self._manatee_corpora.get_info(x["id"]).name, "size": l10n.simplify_num(self._manatee_corpora.get_info(x["id"]).size), "description": self._manatee_corpora.get_info(x["id"]).description, } ) return featured
def _export_featured(self, plugin_api): permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict) def is_featured(o): return o['metadata'].get('featured', False) featured = [] for x in self._raw_list(plugin_api.user_lang).values(): if x['id'] in permitted_corpora and is_featured(x): featured.append({ # on client-side, this may contain also subc. id, aligned ids 'id': x['id'], 'corpus_id': x['id'], 'name': self._manatee_corpora.get_info(x['id']).name, 'size': self._manatee_corpora.get_info(x['id']).size, 'size_info': l10n.simplify_num(self._manatee_corpora.get_info(x['id']).size), 'description': self._export_untranslated_label( plugin_api, self._manatee_corpora.get_info(x['id']).description) }) return featured
def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None): if query is False: # False means 'use default values' query = '' ans = {'rows': []} permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict) used_keywords = set() all_keywords_map = dict(self._corparch.all_keywords(plugin_api.user_lang)) if filter_dict.get('minSize'): min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False) else: min_size = 0 if filter_dict.get('maxSize'): max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False) else: max_size = None if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = int(self._corparch.max_page_size) else: limit = int(limit) user_items = self._corparch.user_items.get_user_items(plugin_api) def fav_id(corpus_id): for item in user_items: if item.is_single_corpus and item.main_corpus_id == corpus_id: return item.ident return None query_substrs, query_keywords = parse_query(self._tag_prefix, query) normalized_query_substrs = [s.lower() for s in query_substrs] for corp in self._corparch.get_list(plugin_api, permitted_corpora): full_data = self._corparch.get_corpus_info(plugin_api.user_lang, corp['id']) if not isinstance(full_data, BrokenCorpusInfo): keywords = [k for k in full_data['metadata']['keywords'].keys()] tests = [] found_in = [] tests.extend([k in keywords for k in query_keywords]) for s in normalized_query_substrs: # the name must be tested first to prevent the list 'found_in' # to be filled in case item matches both name and description if s in corp['name'].lower(): tests.append(True) elif s in (corp['desc'].lower() if corp['desc'] else ''): tests.append(True) found_in.append('defaultCorparch__found_in_desc') else: tests.append(False) tests.append(self.matches_size(corp, min_size, max_size)) tests.append(self._corparch.custom_filter( self._plugin_api, full_data, permitted_corpora)) if self.matches_all(tests): corp['size'] = corp['size'] corp['size_info'] = l10n.simplify_num(corp['size']) if corp['size'] else None corp['keywords'] = [(k, all_keywords_map[k]) for k in keywords] corp['found_in'] = found_in corp['fav_id'] = fav_id(corp['id']) # because of client-side fav/feat/search items compatibility corp['corpus_id'] = corp['id'] ans['rows'].append(corp) used_keywords.update(keywords) if not self.should_fetch_next(ans, offset, limit): break ans['rows'], ans['nextOffset'] = self.cut_result( self.sort(plugin_api, ans['rows']), offset, limit) ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang) ans['query'] = query ans['current_keywords'] = query_keywords ans['filters'] = dict(filter_dict) return ans
def v1(self, req): self._response.set_header('Content-Type', 'application/xml') current_version = 1.2 default_corp_list = settings.get('corpora', 'default_corpora', []) corpname = None if 0 == len(default_corp_list): _logger.critical( 'FCS cannot work properly without a default_corpora set') else: corpname = default_corp_list[0] pr = urllib.parse.urlparse(req.host_url) # None values should be filled in later data = { 'corpname': corpname, 'corppid': None, 'version': current_version, 'recordPacking': 'xml', 'result': [], 'operation': None, 'numberOfRecords': 0, 'server_name': pr.hostname, 'server_port': pr.port or 80, 'database': req.path, 'maximumRecords': None, 'maximumTerms': None, 'startRecord': None, 'responsePosition': None, } # supported parameters for all operations supported_args = [ 'operation', 'stylesheet', 'version', 'extraRequestData' ] try: # check operation operation = req.args.get('operation', 'explain') data['operation'] = operation # check version version = req.args.get('version', None) if version is not None and current_version < float(version): raise Exception(5, version, 'Unsupported version') # check integer parameters maximumRecords = req.args.get('maximumRecords', 250) if 'maximumRecords' in req.args: try: maximumRecords = int(maximumRecords) if maximumRecords <= 0: raise Exception(6, 'maximumRecords', 'Unsupported parameter value') except: raise Exception(6, 'maximumRecords', 'Unsupported parameter value') data['maximumRecords'] = maximumRecords maximumTerms = req.args.get('maximumTerms', 100) if 'maximumTerms' in req.args: try: maximumTerms = int(maximumTerms) except: raise Exception(6, 'maximumTerms', 'Unsupported parameter value') data['maximumTerms'] = maximumTerms startRecord = req.args.get('startRecord', 1) if 'startRecord' in req.args: try: startRecord = int(startRecord) if startRecord <= 0: raise Exception(6, 'startRecord', 'Unsupported parameter value') except: raise Exception(6, 'startRecord', 'Unsupported parameter value') data['startRecord'] = startRecord responsePosition = req.args.get('responsePosition', 0) if 'responsePosition' in req.args: try: responsePosition = int(responsePosition) except: raise Exception(6, 'responsePosition', 'Unsupported parameter value') data['responsePosition'] = responsePosition # set content-type in HTTP header recordPacking = req.args.get('recordPacking', 'xml') if recordPacking == 'xml': pass elif recordPacking == 'string': # TODO(jm)!!! self._response.set_header('Content-Type', 'text/plain; charset=utf-8') else: raise Exception(71, 'recordPacking', 'Unsupported record packing') # provide info about service if operation == ' te dal': self._check_args( req, supported_args, ['recordPacking', 'x-fcs-endpoint-description']) corpus = self.cm.get_corpus(corpname) data['result'] = corpus.get_posattrs() data['numberOfRecords'] = len(data['result']) data['corpus_desc'] = 'Corpus {0} ({1} tokens)'.format( corpus.get_conf('NAME'), l10n.simplify_num(corpus.size)) data['corpus_lang'] = Languages.get_iso_code( corpus.get_conf('LANGUAGE')) data['show_endpoint_desc'] = (True if req.args.get( 'x-fcs-endpoint-description', 'false') == 'true' else False) # wordlist for a given attribute elif operation == 'scan': self._check_args(req, supported_args, [ 'scanClause', 'responsePosition', 'maximumTerms', 'x-cmd-resource-info' ]) data['resourceInfoRequest'] = req.args.get( 'x-cmd-resource-info', '') == 'true' scanClause = req.args.get('scanClause', '') if scanClause.startswith('fcs.resource='): value = scanClause.split('=')[1] data['result'] = self._corpora_info(value, maximumTerms) else: data['result'] = conclib.fcs_scan(corpname, scanClause, maximumTerms, responsePosition) # simple concordancer elif operation == 'searchRetrieve': # TODO we should review the args here (especially x-cmd-context, resultSetTTL) self._check_args(req, supported_args, [ 'query', 'startRecord', 'maximumRecords', 'recordPacking', 'recordSchema', 'resultSetTTL', 'x-cmd-context', 'x-fcs-context' ]) if 'x-cmd-context' in req.args: req_corpname = req.args['x-cmd-context'] user_corpora = plugins.runtime.AUTH.instance.permitted_corpora( self.session_get('user')) if req_corpname in user_corpora: corpname = req_corpname else: _logger.warning( 'Requested unavailable corpus [%s], defaulting to [%s]', req_corpname, corpname) data['corpname'] = corpname corp_conf_info = plugins.runtime.CORPARCH.instance.get_corpus_info( self._plugin_ctx, corpname) data['corppid'] = corp_conf_info.get('web', '') query = req.args.get('query', '') corpus = self.cm.get_corpus(corpname) if 0 == len(query): raise Exception(7, 'fcs_query', 'Mandatory parameter not supplied') data['result'], data['numberOfRecords'] = self.fcs_search( corpus, corpname, query, maximumRecords, startRecord) # unsupported operation else: # show within explain template data['operation'] = 'explain' raise Exception(4, '', 'Unsupported operation') # catch exception and amend diagnostics in template except Exception as e: data['message'] = ('error', repr(e)) try: data['code'], data['details'], data['msg'] = e except (ValueError, TypeError): data['code'], data['details'] = 1, repr(e) data['msg'] = 'General system error' return data
def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None): external_keywords = filter_dict.getlist('keyword') external_keywords = self._corparch.map_external_keywords( external_keywords, plugin_api.user_lang) if len(external_keywords) != 0: query_substrs = [] query_keywords = external_keywords + [self.default_label] else: if self.SESSION_KEYWORDS_KEY not in plugin_api.session: plugin_api.session[self.SESSION_KEYWORDS_KEY] = [ self.default_label ] initial_query = query if query is False: query = '' query_substrs, query_keywords = parse_query( self._tag_prefix, query) if len(query_keywords) == 0 and initial_query is False: query_keywords = plugin_api.session[self.SESSION_KEYWORDS_KEY] else: plugin_api.session[self.SESSION_KEYWORDS_KEY] = query_keywords query = ' '.join(query_substrs) \ + ' ' + ' '.join('%s%s' % (self._tag_prefix, s) for s in query_keywords) ans = {'rows': []} permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict) if filter_dict.get('minSize'): min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False) else: min_size = 0 if filter_dict.get('maxSize'): max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False) else: max_size = None sorting_field = filter_dict.get('sortBySize', 'name') if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = int(self._corparch.max_page_size) else: limit = int(limit) user_items = self._corparch.user_items.get_user_items(plugin_api) def fav_id(corpus_id): for item in user_items: if item.is_single_corpus and item.main_corpus_id == corpus_id: return item.ident return None query_substrs, query_keywords = parse_query(self._tag_prefix, query) all_keywords_map = dict( self._corparch.all_keywords(plugin_api.user_lang)) normalized_query_substrs = [s.lower() for s in query_substrs] used_keywords = set() for corp in self._corparch.get_list(plugin_api): full_data = self._corparch.get_corpus_info(plugin_api.user_lang, corp['id']) if not isinstance(full_data, BrokenCorpusInfo): keywords = [k for k, _ in full_data.metadata.keywords] tests = [] found_in = [] tests.extend([k in keywords for k in query_keywords]) for s in normalized_query_substrs: # the name must be tested first to prevent the list 'found_in' # to be filled in case item matches both name and description if s in corp['name'].lower(): tests.append(True) elif s in (corp['desc'].lower() if corp['desc'] else ''): tests.append(True) found_in.append('defaultCorparch__found_in_desc') else: tests.append(False) tests.append(self.matches_size(corp, min_size, max_size)) tests.append( self._corparch.custom_filter(self._plugin_api, full_data, permitted_corpora)) if all(test for test in tests): corp['size'] = corp['size'] corp['size_info'] = l10n.simplify_num( corp['size']) if corp['size'] else None corp['keywords'] = [(k, all_keywords_map[k]) for k in keywords] corp['found_in'] = found_in corp['fav_id'] = fav_id(corp['id']) # because of client-side fav/feat/search items compatibility corp['corpus_id'] = corp['id'] corp['pmltq'] = full_data['pmltq'] corp['repo'] = full_data['web'] corp['access'] = full_data['access'] corp['tokenConnect'] = full_data['token_connect'][ 'providers'] ans['rows'].append(corp) used_keywords.update(keywords) if not self.should_fetch_next(ans, offset, limit): break ans['rows'], ans['nextOffset'] = self.cut_result( self.sort(plugin_api, ans['rows'], field=sorting_field), offset, limit) ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang) ans['query'] = query ans['current_keywords'] = query_keywords ans['filters'] = dict(filter_dict) return ans
def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None): if query is False: # False means 'use default values' query = '' ans = {'rows': []} permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict) used_keywords = set() all_keywords_map = dict( self._corparch.all_keywords(plugin_api.user_lang)) if filter_dict.get('minSize'): min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False) else: min_size = 0 if filter_dict.get('maxSize'): max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False) else: max_size = None if filter_dict.get('favOnly'): favourite_only = bool(int(filter_dict.get('favOnly'))) else: favourite_only = False if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = int(self._corparch.max_page_size) else: limit = int(limit) user_items = self._corparch.user_items.get_user_items(plugin_api) def fav_id(corpus_id): for item in user_items: if item.is_single_corpus and item.main_corpus_id == corpus_id: return item.ident return None query_substrs, query_keywords = parse_query(self._tag_prefix, query) normalized_query_substrs = [s.lower() for s in query_substrs] for corp in self._corparch.get_list(plugin_api, permitted_corpora): full_data = self._corparch.get_corpus_info(plugin_api.user_lang, corp['id']) if not isinstance(full_data, BrokenCorpusInfo): if favourite_only and fav_id(corp['id']) is None: continue keywords = [k for k, _ in full_data.metadata.keywords] tests = [] found_in = [] tests.extend([k in keywords for k in query_keywords]) for s in normalized_query_substrs: # the name must be tested first to prevent the list 'found_in' # to be filled in case item matches both name and description if s in corp['name'].lower(): tests.append(True) elif s in (corp['desc'].lower() if corp['desc'] else ''): tests.append(True) found_in.append('defaultCorparch__found_in_desc') else: tests.append(False) tests.append(self.matches_size(corp, min_size, max_size)) tests.append( self._corparch.custom_filter(self._plugin_api, full_data, permitted_corpora)) if self.matches_all(tests): corp['size'] = corp['size'] corp['size_info'] = l10n.simplify_num( corp['size']) if corp['size'] else None corp['keywords'] = [(k, all_keywords_map[k]) for k in keywords] corp['found_in'] = found_in corp['fav_id'] = fav_id(corp['id']) # because of client-side fav/feat/search items compatibility corp['corpus_id'] = corp['id'] ans['rows'].append(corp) used_keywords.update(keywords) if not self.should_fetch_next(ans, offset, limit): break ans['rows'], ans['nextOffset'] = self.cut_result( self.sort(plugin_api, ans['rows']), offset, limit) ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang) ans['query'] = query ans['current_keywords'] = query_keywords ans['filters'] = dict(filter_dict) return ans
def search(self, user_id, query, offset=0, limit=None, filter_dict=None): ans = {"rows": []} permitted_corpora = self._auth.permitted_corpora(user_id) user_items = self._user_items.get_user_items(user_id) used_keywords = set() all_keywords_map = dict(self.all_keywords) if filter_dict.get("minSize"): min_size = l10n.desimplify_num(filter_dict.get("minSize"), strict=False) else: min_size = 0 if filter_dict.get("maxSize"): max_size = l10n.desimplify_num(filter_dict.get("maxSize"), strict=False) else: max_size = None corplist = self.get_list(permitted_corpora) if offset is None: offset = 0 else: offset = int(offset) if limit is None: limit = self._max_page_size def cut_result(res): if limit is not None: right_lim = offset + int(limit) new_res = res[offset:right_lim] if right_lim >= len(res): right_lim = None else: right_lim = None new_res = res return new_res, right_lim def is_fav(corpus_id): for item in user_items: if isinstance(item, CorpusItem) and item.corpus_id == corpus_id: return True return False query_substrs, query_keywords = self._parse_query(query) matches_all = lambda d: reduce(lambda t1, t2: t1 and t2, d, True) def matches_size(d): item_size = d.get("size", None) return ( item_size is not None and (not min_size or int(item_size) >= int(min_size)) and (not max_size or int(item_size) <= int(max_size)) ) normalized_query_substrs = [s.lower() for s in query_substrs] for corp in corplist: full_data = self.get_corpus_info(corp["id"], self.getlocal("lang")) if not isinstance(full_data, BrokenCorpusInfo): keywords = [k for k in full_data["metadata"]["keywords"].keys()] hits = [] found_in = [] hits.extend([k in keywords for k in query_keywords]) for s in normalized_query_substrs: # the name must be tested first to prevent the list 'found_in' # to be filled in case item matches both name and description if s in corp["name"].lower(): hits.append(True) elif s in (corp["desc"].lower() if corp["desc"] else ""): hits.append(True) found_in.append(_("description")) else: hits.append(False) hits.append(matches_size(corp)) hits.append(self.custom_filter(full_data, permitted_corpora)) if matches_all(hits): corp["raw_size"] = l10n.simplify_num(corp["size"]) if corp["size"] else None corp["keywords"] = [(k, all_keywords_map[k]) for k in keywords] corp["found_in"] = found_in corp["user_item"] = is_fav(corp["id"]) self.customize_search_result_item(corp, full_data) ans["rows"].append(corp) used_keywords.update(keywords) corp_cmp_key = lambda c: c.get("name") if c.get("name") is not None else "" ans["rows"], ans["nextOffset"] = cut_result(l10n.sort(ans["rows"], loc=self._lang(), key=corp_cmp_key)) ans["keywords"] = l10n.sort(used_keywords, loc=self._lang()) ans["query"] = query ans["filters"] = dict(filter_dict) return ans
def __post_init__(self): self.size_info = l10n.simplify_num(self.size)
def v1(self, req): self._headers['Content-Type'] = 'application/xml' current_version = 1.2 default_corp_list = settings.get('corpora', 'default_corpora', []) corpname = None if 0 == len(default_corp_list): _logger.critical('FCS cannot work properly without a default_corpora set') else: corpname = default_corp_list[0] pr = urlparse.urlparse(req.host_url) # None values should be filled in later data = { 'corpname': corpname, 'corppid': None, 'version': current_version, 'recordPacking': 'xml', 'result': [], 'operation': None, 'numberOfRecords': 0, 'server_name': pr.hostname, 'server_port': pr.port or 80, 'database': req.path, 'maximumRecords': None, 'maximumTerms': None, 'startRecord': None, 'responsePosition': None, } # supported parameters for all operations supported_args = ['operation', 'stylesheet', 'version', 'extraRequestData'] try: # check operation operation = req.args.get('operation', 'explain') data['operation'] = operation # check version version = req.args.get('version', None) if version is not None and current_version < float(version): raise Exception(5, version, 'Unsupported version') # check integer parameters maximumRecords = req.args.get('maximumRecords', 250) if 'maximumRecords' in req.args: try: maximumRecords = int(maximumRecords) if maximumRecords <= 0: raise Exception(6, 'maximumRecords', 'Unsupported parameter value') except: raise Exception(6, 'maximumRecords', 'Unsupported parameter value') data['maximumRecords'] = maximumRecords maximumTerms = req.args.get('maximumTerms', 100) if 'maximumTerms' in req.args: try: maximumTerms = int(maximumTerms) except: raise Exception(6, 'maximumTerms', 'Unsupported parameter value') data['maximumTerms'] = maximumTerms startRecord = req.args.get('startRecord', 1) if 'startRecord' in req.args: try: startRecord = int(startRecord) if startRecord <= 0: raise Exception(6, 'startRecord', 'Unsupported parameter value') except: raise Exception(6, 'startRecord', 'Unsupported parameter value') data['startRecord'] = startRecord responsePosition = req.args.get('responsePosition', 0) if 'responsePosition' in req.args: try: responsePosition = int(responsePosition) except: raise Exception(6, 'responsePosition', 'Unsupported parameter value') data['responsePosition'] = responsePosition # set content-type in HTTP header recordPacking = req.args.get('recordPacking', 'xml') if recordPacking == 'xml': pass elif recordPacking == 'string': # TODO(jm)!!! self._headers['Content-Type'] = 'text/plain; charset=utf-8' else: raise Exception(71, 'recordPacking', 'Unsupported record packing') # provide info about service if operation == 'explain': self._check_args( req, supported_args, ['recordPacking', 'x-fcs-endpoint-description'] ) corpus = self.cm.get_Corpus(corpname) import_str = partial(l10n.import_string, from_encoding=corpus.get_conf('ENCODING')) data['result'] = corpus.get_conf('ATTRLIST').split(',') data['numberOfRecords'] = len(data['result']) data['corpus_desc'] = u'Corpus {0} ({1} tokens)'.format( import_str(corpus.get_conf('NAME')), l10n.simplify_num(corpus.size())) data['corpus_lang'] = Languages.get_iso_code(corpus.get_conf('LANGUAGE')) data['show_endpoint_desc'] = (True if req.args.get('x-fcs-endpoint-description', 'false') == 'true' else False) # wordlist for a given attribute elif operation == 'scan': self._check_args( req, supported_args, ['scanClause', 'responsePosition', 'maximumTerms', 'x-cmd-resource-info'] ) data['resourceInfoRequest'] = req.args.get('x-cmd-resource-info', '') == 'true' scanClause = req.args.get('scanClause', '') if scanClause.startswith('fcs.resource='): value = scanClause.split('=')[1] data['result'] = self._corpora_info(value, maximumTerms) else: data['result'] = conclib.fcs_scan( corpname, scanClause, maximumTerms, responsePosition) # simple concordancer elif operation == 'searchRetrieve': # TODO we should review the args here (especially x-cmd-context, resultSetTTL) self._check_args( req, supported_args, ['query', 'startRecord', 'maximumRecords', 'recordPacking', 'recordSchema', 'resultSetTTL', 'x-cmd-context', 'x-fcs-context'] ) if 'x-cmd-context' in req.args: req_corpname = req.args['x-cmd-context'] user_corpora = plugins.runtime.AUTH.instance.permitted_corpora( self.session_get('user')) if req_corpname in user_corpora: corpname = req_corpname else: _logger.warning( 'Requested unavailable corpus [%s], defaulting to [%s]', req_corpname, corpname) data['corpname'] = corpname corp_conf_info = plugins.runtime.CORPARCH.instance.get_corpus_info('en_US', corpname) data['corppid'] = corp_conf_info.get('web', '') query = req.args.get('query', '') corpus = self.cm.get_Corpus(corpname) if 0 == len(query): raise Exception(7, 'fcs_query', 'Mandatory parameter not supplied') data['result'], data['numberOfRecords'] = self.fcs_search( corpus, corpname, query, maximumRecords, startRecord) # unsupported operation else: # show within explain template data['operation'] = 'explain' raise Exception(4, '', 'Unsupported operation') # catch exception and amend diagnostics in template except Exception as e: data['message'] = ('error', repr(e)) try: data['code'], data['details'], data['msg'] = e except ValueError: data['code'], data['details'] = 1, repr(e) data['msg'] = 'General system error' return data