Пример #1
0
def if_no_such_template_redirect(template_name: str) -> Optional[RRV]:
    if template_name not in templates:
        return flask.render_template(
            'no-such-template.html',
            template_name=template_name,
        )
    elif isinstance(templates[template_name], str):
        return flask.redirect(flask.url_for(
            cast(str, flask.request.endpoint),
            **(dict(cast(dict[str, Any], flask.request.view_args), template_name=templates[template_name])),
            **flask.request.args.to_dict(flat=False),
        ), code=307)
    elif isinstance(templates[template_name], list):
        replacement_templates = [
            templates_without_redirects[replacement_name]
            for replacement_name in templates[template_name]
        ]
        flask.g.interface_language_code = lang_lex2int(replacement_templates[0]['language_code'])
        return flask.render_template(
            'ambiguous-template.html',
            template_name=template_name,
            replacement_templates=replacement_templates,
        )
    else:
        return None
Пример #2
0
def term_span(term: Term) -> flask.Markup:
    interface_language_code = lang_lex2int(term['language'])
    return (flask.Markup(r'<span lang="') +
            flask.Markup.escape(lang_int2html(interface_language_code)) +
            flask.Markup(r'" dir="') +
            flask.Markup.escape(text_direction(interface_language_code)) +
            flask.Markup(r'">') +
            flask.Markup.escape(term['value']) +
            flask.Markup(r'</span>'))
Пример #3
0
def process_template_edit(template_name: str, lexeme_id: str) -> RRV:
    response = if_no_such_template_redirect(template_name)
    if response:
        return response

    template = templates_without_redirects[template_name]
    template_language_code = template['language_code']
    flask.g.interface_language_code = lang_lex2int(template_language_code)
    representation_language_code = flask.request.args.get('language_code', template_language_code)
    wiki = 'test' if 'test' in template else 'www'

    if flask.request.method == 'POST':
        lexeme_revision = flask.request.form['_lexeme_revision']
        lexeme_data = get_lexeme_data(lexeme_id, wiki, lexeme_revision)
    else:
        lexeme_data = get_lexeme_data(lexeme_id, wiki)
        lexeme_revision = str(lexeme_data['lastrevid'])

    lexeme_match = match_template_to_lexeme_data(template, lexeme_data)
    lexeme_matches_template = (
        lexeme_match['language'] and
        lexeme_match['lexical_category'] and
        not lexeme_match['conflicting_statements']
    )
    template = match_lexeme_forms_to_template(lexeme_data['forms'], template)
    template = cast(BoundTemplate, template)
    template['lexeme_id'] = lexeme_id
    template['lexeme_revision'] = lexeme_revision

    readonly = 'OAUTH' in app.config and 'oauth_access_token' not in flask.session

    if (flask.request.method == 'POST' and
            '_edit_mode' in flask.request.form and
            csrf_token_matches(flask.request.form) and
            not readonly):
        form_data = flask.request.form
        lexeme_data = update_lexeme(lexeme_data, template, form_data, representation_language_code, missing_statements=lexeme_match['missing_statements'])
        summary = build_summary(template, form_data)

        if 'OAUTH' in app.config:
            lexeme_id, lexeme_uri = submit_lexeme(template, lexeme_data, summary)
            target = add_hash_to_uri(lexeme_uri, form_data.get('target_hash'))
            return flask.redirect(target, code=303)
        else:
            print(summary)
            return flask.jsonify(lexeme_data)

    for template_form in template['forms']:
        template_form = cast(MatchedTemplateForm, template_form)
        if lexeme_forms := template_form.get('lexeme_forms'):
            template_form = cast(EditedTemplateForm, template_form)
            template_form['value'] = '/'.join(lexeme_form['representations'][representation_language_code]['value']
                                              for lexeme_form in lexeme_forms
                                              if representation_language_code in lexeme_form['representations'])
Пример #4
0
def test_translations_available():
    missing_language_codes = set()
    for template in templates.templates_without_redirects.values():
        language_code = lang_lex2int(template['language_code'])
        if language_code not in translations.translations:
            missing_language_codes.add(language_code)

    # language code disabled on translatewiki.net, needs more consideration?
    missing_language_codes.remove('zh')

    assert not missing_language_codes
Пример #5
0
def get_duplicates_api(wiki: str, language_code: str, lemma: str) -> RRV:
    flask.g.interface_language_code = lang_lex2int(language_code)
    matches = get_duplicates(wiki, language_code, lemma)
    if not matches:
        return flask.Response(status=204)
    if flask.request.accept_mimetypes.accept_html:
        return render_duplicates(
            matches,
            in_bulk_mode=False,
            template_name=flask.request.args.get('template_name'),
        )
    else:
        return flask.jsonify(matches)
Пример #6
0
def process_template_advanced(template_name: str, advanced: bool = True) -> RRV:
    response = if_no_such_template_redirect(template_name)
    if response:
        return response

    template = templates_without_redirects[template_name]
    flask.g.interface_language_code = lang_lex2int(template['language_code'])
    form_data = flask.request.form  # type: werkzeug.datastructures.MultiDict

    readonly = 'OAUTH' in app.config and 'oauth_access_token' not in flask.session

    if (flask.request.method == 'POST' and
            form_data.get('_advanced_mode', 'None') == str(advanced) and
            not readonly):
        response = if_has_duplicates_redirect(template, advanced, form_data)
        if response:
            return response

        response = if_needs_csrf_redirect(template, advanced, form_data)
        if response:
            return response

        lexeme_data = build_lexeme(template, form_data)
        summary = build_summary(template, form_data)

        if 'OAUTH' in app.config:
            lexeme_id, lexeme_uri = submit_lexeme(template, lexeme_data, summary)
            target = add_hash_to_uri(lexeme_uri, form_data.get('target_hash'))
            return flask.redirect(target, code=303)
        else:
            print(summary)
            return flask.jsonify(lexeme_data)
    else:
        if not form_data:
            form_data = flask.request.args
        return flask.render_template(
            'template.html',
            template=add_form_data_to_template(form_data, template),
            lemmas=build_lemmas(template, form_data),
            lexeme_id=form_data.get('lexeme_id'),
            advanced=advanced,
            can_use_bulk_mode=can_use_bulk_mode(),
            readonly=readonly,
        )
Пример #7
0
def language_name_with_code(language_code: str) -> flask.Markup:
    code_zxx = (flask.Markup(r'<span lang=zxx>') +
                flask.Markup.escape(language_code) +
                flask.Markup(r'</span>'))
    language_name = autonym(language_code)
    if language_name is None:
        language_name = label(language_code)
    if language_name is None:
        return code_zxx
    interface_language_code = lang_lex2int(language_code)
    return (flask.Markup(r'<span lang="') +
            flask.Markup.escape(lang_int2html(interface_language_code)) +
            flask.Markup(r'" dir="') +
            flask.Markup.escape(text_direction(interface_language_code)) +
            flask.Markup(r'">') +
            flask.Markup.escape(language_name) +
            flask.Markup(r' (') +
            code_zxx +
            flask.Markup(r')</span>'))
Пример #8
0
def add_labels_to_lexeme_forms_grammatical_features(session, language, lexeme_forms):
    grammatical_features_item_ids = set()
    for lexeme_form in lexeme_forms:
        grammatical_features_item_ids.update(lexeme_form['grammaticalFeatures'])
    grammatical_features_item_ids = list(grammatical_features_item_ids)
    labels_map = {}  # item ID to label
    while grammatical_features_item_ids:
        chunk, grammatical_features_item_ids = grammatical_features_item_ids[:50], grammatical_features_item_ids[50:]
        response = session.get(action='wbgetentities',
                               ids=chunk,
                               props=['labels'],
                               languages=[lang_lex2int(language)],
                               languagefallback=1,  # TODO use True once mediawiki-utilities/python-mwapi#38 is in a released version
                               formatversion=2)
        for item_id, item in response['entities'].items():
            labels_map[item_id] = item['labels'].get(language, {'language': 'zxx', 'value': item_id})
    for lexeme_form in lexeme_forms:
        lexeme_form['grammaticalFeatures_labels'] = [labels_map[grammatical_feature_item_id]
                                                     for grammatical_feature_item_id in lexeme_form['grammaticalFeatures']]
Пример #9
0
def get_duplicates(wiki: str, language_code: str, lemma: str) -> list[Duplicate]:
    session = anonymous_session(f'https://{wiki}.wikidata.org')

    api_language_code = lang_lex2int(language_code)

    response = session.get(
        action='wbsearchentities',
        search=lemma,
        language=api_language_code,
        uselang=api_language_code,  # for the result descriptions
        type='lexeme',
        limit=50,
    )
    matches: dict[str, Duplicate] = {}
    for result in response['search']:
        if (result.get('label') == lemma and
            (result['match']['language'] == language_code or
             (len(language_code) > 2 and result['match']['language'] == 'und'))):  # T230833
            match = {
                'id': result['id'],
                'uri': result['concepturi'],
                'label': result['label'],
                'description': result['description'],
            }
            matches[result['id']] = cast(Duplicate, match)  # missing forms_count, senses_count added below

    if matches:
        response = session.get(  # no, this can’t be combined with the previous call by using generator=wbsearch – then we don’t get the match language
            action='query',
            titles=['Lexeme:' + id for id in matches],
            prop=['pageprops'],
            ppprop=['wbl-forms', 'wbl-senses'],
        )
        for page in response['query']['pages'].values():
            id = page['title'][len('Lexeme:'):]
            pageprops = page.get('pageprops', {})
            matches[id]['forms_count'] = pageprops.get('wbl-forms')
            matches[id]['senses_count'] = pageprops.get('wbl-senses')

    return list(matches.values())  # list() to turn odict_values (not JSON serializable) into plain list
Пример #10
0
def render_advanced_partial_forms_hint(language_code: str) -> RRV:
    flask.g.interface_language_code = lang_lex2int(language_code)
    return flask.render_template(
        'advanced_partial_forms_hint.html',
    )
Пример #11
0
def render_no_duplicate(language_code: str) -> RRV:
    flask.g.interface_language_code = lang_lex2int(language_code)
    return flask.render_template(
        'no_duplicate.html',
    )
Пример #12
0
def process_template_bulk(template_name: str) -> RRV:
    response = if_no_such_template_redirect(template_name)
    if response:
        return response

    template = templates_without_redirects[template_name]
    flask.g.interface_language_code = lang_lex2int(template['language_code'])

    readonly = 'OAUTH' in app.config and 'oauth_access_token' not in flask.session

    if not can_use_bulk_mode() and not readonly:
        return flask.render_template(
            'bulk-not-allowed.html',
        )

    if (flask.request.method == 'POST' and
            '_bulk_mode' in flask.request.form and
            csrf_token_matches(flask.request.form) and
            not readonly):

        form_data = flask.request.form
        parse_error = None
        show_optional_forms_hint = False
        try:
            lexemes = parse_lexemes(form_data['lexemes'], template)
        except FirstFieldNotLexemeIdError as error:
            parse_error = message_with_kwargs(
                'bulk_first_field_not_lexeme_id',
                num_forms=error.num_forms,
                num_fields=error.num_fields,
                first_field=error.first_field,
                line_number=error.line_number,
            )
        except FirstFieldLexemeIdError as error:
            parse_error = message_with_kwargs(
                'bulk_first_field_lexeme_id',
                num_forms=error.num_forms,
                num_fields=error.num_fields,
                first_field=error.first_field,
                line_number=error.line_number,
            )
        except WrongNumberOfFieldsError as error:
            show_optional_forms_hint = error.num_fields < error.num_forms
            parse_error = message_with_kwargs(
                'bulk_wrong_number_of_fields',
                num_forms=error.num_forms,
                num_fields=error.num_fields,
                line_number=error.line_number,
            )
        except ValueError as error:
            parse_error = str(error)
        if parse_error is not None:
            return flask.render_template(
                'bulk.html',
                template=template,
                value=form_data['lexemes'],
                parse_error=parse_error,
                show_optional_forms_hint=show_optional_forms_hint,
            )

        results = []  # type: list[dict]

        for lexeme in lexemes:
            if not lexeme.get('lexeme_id'):
                duplicates = find_duplicates(template, lexeme)
                if duplicates:
                    results.append({
                        'duplicates': duplicates,
                        'form_representations': lexeme.getlist('form_representation'),
                    })
                    continue
            lexeme_data = build_lexeme(template, lexeme)
            summary = build_summary(template, form_data)

            if 'OAUTH' in app.config:
                lexeme_id, lexeme_uri = submit_lexeme(template, lexeme_data, summary)
                results.append({
                    'lexeme_data': lexeme_data,
                    'lexeme_id': lexeme_id,
                    'lexeme_uri': lexeme_uri,
                })
            else:
                print(summary)
                results.append({
                    'lexeme_data': lexeme_data,
                })

        if 'OAUTH' in app.config:
            return flask.render_template(
                'bulk-result.html',
                template=template,
                results=results,
            )
        else:
            return flask.jsonify(results)

    else:
        placeholder = ''
        for form in template['forms']:
            if placeholder:
                placeholder += '|'
            (prefix, form_placeholder, suffix) = split_example(form)
            placeholder += form_placeholder
        placeholder += '\n...'
        csrf_error = False

        if flask.request.method == 'POST':
            form_data = flask.request.form
            if 'form_representation' in form_data:
                # user came from non-bulk mode
                representations = form_data.getlist('form_representation')
                value = '|'.join(representations)
                if value == '|' * (len(representations) - 1):
                    # ...but had not typed anything into non-bulk mode yet,
                    # clear the value so that the placeholder is shown
                    value = ''
                else:
                    value += '\n'  # for convenience when adding more
            else:
                # user came from bulk mode with CSRF error
                value = form_data['lexemes']
                csrf_error = True
        else:
            value = None

        return flask.render_template(
            'bulk.html',
            template=template,
            placeholder=placeholder,
            value=value,
            csrf_error=csrf_error,
            show_optional_forms_hint=False,
            readonly=readonly,
        )