def if_no_such_template_redirect(template_name: str) -> Optional[RRV]: if template_name not in templates: return flask.render_template( 'no-such-template.html', template_name=template_name, ) elif isinstance(templates[template_name], str): return flask.redirect(flask.url_for( cast(str, flask.request.endpoint), **(dict(cast(dict[str, Any], flask.request.view_args), template_name=templates[template_name])), **flask.request.args.to_dict(flat=False), ), code=307) elif isinstance(templates[template_name], list): replacement_templates = [ templates_without_redirects[replacement_name] for replacement_name in templates[template_name] ] flask.g.interface_language_code = lang_lex2int(replacement_templates[0]['language_code']) return flask.render_template( 'ambiguous-template.html', template_name=template_name, replacement_templates=replacement_templates, ) else: return None
def term_span(term: Term) -> flask.Markup: interface_language_code = lang_lex2int(term['language']) return (flask.Markup(r'<span lang="') + flask.Markup.escape(lang_int2html(interface_language_code)) + flask.Markup(r'" dir="') + flask.Markup.escape(text_direction(interface_language_code)) + flask.Markup(r'">') + flask.Markup.escape(term['value']) + flask.Markup(r'</span>'))
def process_template_edit(template_name: str, lexeme_id: str) -> RRV: response = if_no_such_template_redirect(template_name) if response: return response template = templates_without_redirects[template_name] template_language_code = template['language_code'] flask.g.interface_language_code = lang_lex2int(template_language_code) representation_language_code = flask.request.args.get('language_code', template_language_code) wiki = 'test' if 'test' in template else 'www' if flask.request.method == 'POST': lexeme_revision = flask.request.form['_lexeme_revision'] lexeme_data = get_lexeme_data(lexeme_id, wiki, lexeme_revision) else: lexeme_data = get_lexeme_data(lexeme_id, wiki) lexeme_revision = str(lexeme_data['lastrevid']) lexeme_match = match_template_to_lexeme_data(template, lexeme_data) lexeme_matches_template = ( lexeme_match['language'] and lexeme_match['lexical_category'] and not lexeme_match['conflicting_statements'] ) template = match_lexeme_forms_to_template(lexeme_data['forms'], template) template = cast(BoundTemplate, template) template['lexeme_id'] = lexeme_id template['lexeme_revision'] = lexeme_revision readonly = 'OAUTH' in app.config and 'oauth_access_token' not in flask.session if (flask.request.method == 'POST' and '_edit_mode' in flask.request.form and csrf_token_matches(flask.request.form) and not readonly): form_data = flask.request.form lexeme_data = update_lexeme(lexeme_data, template, form_data, representation_language_code, missing_statements=lexeme_match['missing_statements']) summary = build_summary(template, form_data) if 'OAUTH' in app.config: lexeme_id, lexeme_uri = submit_lexeme(template, lexeme_data, summary) target = add_hash_to_uri(lexeme_uri, form_data.get('target_hash')) return flask.redirect(target, code=303) else: print(summary) return flask.jsonify(lexeme_data) for template_form in template['forms']: template_form = cast(MatchedTemplateForm, template_form) if lexeme_forms := template_form.get('lexeme_forms'): template_form = cast(EditedTemplateForm, template_form) template_form['value'] = '/'.join(lexeme_form['representations'][representation_language_code]['value'] for lexeme_form in lexeme_forms if representation_language_code in lexeme_form['representations'])
def test_translations_available(): missing_language_codes = set() for template in templates.templates_without_redirects.values(): language_code = lang_lex2int(template['language_code']) if language_code not in translations.translations: missing_language_codes.add(language_code) # language code disabled on translatewiki.net, needs more consideration? missing_language_codes.remove('zh') assert not missing_language_codes
def get_duplicates_api(wiki: str, language_code: str, lemma: str) -> RRV: flask.g.interface_language_code = lang_lex2int(language_code) matches = get_duplicates(wiki, language_code, lemma) if not matches: return flask.Response(status=204) if flask.request.accept_mimetypes.accept_html: return render_duplicates( matches, in_bulk_mode=False, template_name=flask.request.args.get('template_name'), ) else: return flask.jsonify(matches)
def process_template_advanced(template_name: str, advanced: bool = True) -> RRV: response = if_no_such_template_redirect(template_name) if response: return response template = templates_without_redirects[template_name] flask.g.interface_language_code = lang_lex2int(template['language_code']) form_data = flask.request.form # type: werkzeug.datastructures.MultiDict readonly = 'OAUTH' in app.config and 'oauth_access_token' not in flask.session if (flask.request.method == 'POST' and form_data.get('_advanced_mode', 'None') == str(advanced) and not readonly): response = if_has_duplicates_redirect(template, advanced, form_data) if response: return response response = if_needs_csrf_redirect(template, advanced, form_data) if response: return response lexeme_data = build_lexeme(template, form_data) summary = build_summary(template, form_data) if 'OAUTH' in app.config: lexeme_id, lexeme_uri = submit_lexeme(template, lexeme_data, summary) target = add_hash_to_uri(lexeme_uri, form_data.get('target_hash')) return flask.redirect(target, code=303) else: print(summary) return flask.jsonify(lexeme_data) else: if not form_data: form_data = flask.request.args return flask.render_template( 'template.html', template=add_form_data_to_template(form_data, template), lemmas=build_lemmas(template, form_data), lexeme_id=form_data.get('lexeme_id'), advanced=advanced, can_use_bulk_mode=can_use_bulk_mode(), readonly=readonly, )
def language_name_with_code(language_code: str) -> flask.Markup: code_zxx = (flask.Markup(r'<span lang=zxx>') + flask.Markup.escape(language_code) + flask.Markup(r'</span>')) language_name = autonym(language_code) if language_name is None: language_name = label(language_code) if language_name is None: return code_zxx interface_language_code = lang_lex2int(language_code) return (flask.Markup(r'<span lang="') + flask.Markup.escape(lang_int2html(interface_language_code)) + flask.Markup(r'" dir="') + flask.Markup.escape(text_direction(interface_language_code)) + flask.Markup(r'">') + flask.Markup.escape(language_name) + flask.Markup(r' (') + code_zxx + flask.Markup(r')</span>'))
def add_labels_to_lexeme_forms_grammatical_features(session, language, lexeme_forms): grammatical_features_item_ids = set() for lexeme_form in lexeme_forms: grammatical_features_item_ids.update(lexeme_form['grammaticalFeatures']) grammatical_features_item_ids = list(grammatical_features_item_ids) labels_map = {} # item ID to label while grammatical_features_item_ids: chunk, grammatical_features_item_ids = grammatical_features_item_ids[:50], grammatical_features_item_ids[50:] response = session.get(action='wbgetentities', ids=chunk, props=['labels'], languages=[lang_lex2int(language)], languagefallback=1, # TODO use True once mediawiki-utilities/python-mwapi#38 is in a released version formatversion=2) for item_id, item in response['entities'].items(): labels_map[item_id] = item['labels'].get(language, {'language': 'zxx', 'value': item_id}) for lexeme_form in lexeme_forms: lexeme_form['grammaticalFeatures_labels'] = [labels_map[grammatical_feature_item_id] for grammatical_feature_item_id in lexeme_form['grammaticalFeatures']]
def get_duplicates(wiki: str, language_code: str, lemma: str) -> list[Duplicate]: session = anonymous_session(f'https://{wiki}.wikidata.org') api_language_code = lang_lex2int(language_code) response = session.get( action='wbsearchentities', search=lemma, language=api_language_code, uselang=api_language_code, # for the result descriptions type='lexeme', limit=50, ) matches: dict[str, Duplicate] = {} for result in response['search']: if (result.get('label') == lemma and (result['match']['language'] == language_code or (len(language_code) > 2 and result['match']['language'] == 'und'))): # T230833 match = { 'id': result['id'], 'uri': result['concepturi'], 'label': result['label'], 'description': result['description'], } matches[result['id']] = cast(Duplicate, match) # missing forms_count, senses_count added below if matches: response = session.get( # no, this can’t be combined with the previous call by using generator=wbsearch – then we don’t get the match language action='query', titles=['Lexeme:' + id for id in matches], prop=['pageprops'], ppprop=['wbl-forms', 'wbl-senses'], ) for page in response['query']['pages'].values(): id = page['title'][len('Lexeme:'):] pageprops = page.get('pageprops', {}) matches[id]['forms_count'] = pageprops.get('wbl-forms') matches[id]['senses_count'] = pageprops.get('wbl-senses') return list(matches.values()) # list() to turn odict_values (not JSON serializable) into plain list
def render_advanced_partial_forms_hint(language_code: str) -> RRV: flask.g.interface_language_code = lang_lex2int(language_code) return flask.render_template( 'advanced_partial_forms_hint.html', )
def render_no_duplicate(language_code: str) -> RRV: flask.g.interface_language_code = lang_lex2int(language_code) return flask.render_template( 'no_duplicate.html', )
def process_template_bulk(template_name: str) -> RRV: response = if_no_such_template_redirect(template_name) if response: return response template = templates_without_redirects[template_name] flask.g.interface_language_code = lang_lex2int(template['language_code']) readonly = 'OAUTH' in app.config and 'oauth_access_token' not in flask.session if not can_use_bulk_mode() and not readonly: return flask.render_template( 'bulk-not-allowed.html', ) if (flask.request.method == 'POST' and '_bulk_mode' in flask.request.form and csrf_token_matches(flask.request.form) and not readonly): form_data = flask.request.form parse_error = None show_optional_forms_hint = False try: lexemes = parse_lexemes(form_data['lexemes'], template) except FirstFieldNotLexemeIdError as error: parse_error = message_with_kwargs( 'bulk_first_field_not_lexeme_id', num_forms=error.num_forms, num_fields=error.num_fields, first_field=error.first_field, line_number=error.line_number, ) except FirstFieldLexemeIdError as error: parse_error = message_with_kwargs( 'bulk_first_field_lexeme_id', num_forms=error.num_forms, num_fields=error.num_fields, first_field=error.first_field, line_number=error.line_number, ) except WrongNumberOfFieldsError as error: show_optional_forms_hint = error.num_fields < error.num_forms parse_error = message_with_kwargs( 'bulk_wrong_number_of_fields', num_forms=error.num_forms, num_fields=error.num_fields, line_number=error.line_number, ) except ValueError as error: parse_error = str(error) if parse_error is not None: return flask.render_template( 'bulk.html', template=template, value=form_data['lexemes'], parse_error=parse_error, show_optional_forms_hint=show_optional_forms_hint, ) results = [] # type: list[dict] for lexeme in lexemes: if not lexeme.get('lexeme_id'): duplicates = find_duplicates(template, lexeme) if duplicates: results.append({ 'duplicates': duplicates, 'form_representations': lexeme.getlist('form_representation'), }) continue lexeme_data = build_lexeme(template, lexeme) summary = build_summary(template, form_data) if 'OAUTH' in app.config: lexeme_id, lexeme_uri = submit_lexeme(template, lexeme_data, summary) results.append({ 'lexeme_data': lexeme_data, 'lexeme_id': lexeme_id, 'lexeme_uri': lexeme_uri, }) else: print(summary) results.append({ 'lexeme_data': lexeme_data, }) if 'OAUTH' in app.config: return flask.render_template( 'bulk-result.html', template=template, results=results, ) else: return flask.jsonify(results) else: placeholder = '' for form in template['forms']: if placeholder: placeholder += '|' (prefix, form_placeholder, suffix) = split_example(form) placeholder += form_placeholder placeholder += '\n...' csrf_error = False if flask.request.method == 'POST': form_data = flask.request.form if 'form_representation' in form_data: # user came from non-bulk mode representations = form_data.getlist('form_representation') value = '|'.join(representations) if value == '|' * (len(representations) - 1): # ...but had not typed anything into non-bulk mode yet, # clear the value so that the placeholder is shown value = '' else: value += '\n' # for convenience when adding more else: # user came from bulk mode with CSRF error value = form_data['lexemes'] csrf_error = True else: value = None return flask.render_template( 'bulk.html', template=template, placeholder=placeholder, value=value, csrf_error=csrf_error, show_optional_forms_hint=False, readonly=readonly, )