infobox_templates = wikitext.revision.template_names_matching( r"infobox", name="enwiki.revision.infobox_templates") CN_TEMPLATES = [r"Citation[_ ]needed", r"Cn", r"Fact"] cn_templates = wikitext.revision.template_names_matching( "|".join(CN_TEMPLATES), name="enwiki.revision.cn_templates") who_templates = wikitext.revision.template_names_matching( "Who", name="enwiki.revision.who_templates") main_article_templates = wikitext.revision.template_names_matching( "Main", name="enwiki.main_article_templates") cite_templates = wikitext.revision.template_names_matching( r"cite", name="enwiki.revision.cite_templates") proportion_of_templated_references = \ cite_templates / max(wikitext.revision.ref_tags, 1) non_templated_references = max(wikitext.revision.ref_tags - cite_templates, 0) non_cite_templates = sub(wikitext.revision.templates, cite_templates, name="enwiki.revision.non_cite_templates") # Links category_links = wikitext.revision.wikilink_titles_matching( r"Category\:", name="enwiki.revision.category_links") image_links = wikitext.revision.wikilink_titles_matching( r"File|Image\:", name="enwiki.revision.image_links") # References revision = Revision( "enwiki.revision.revision", wikitext.revision.datasources, ) paragraphs = mappers.map(str, revision.paragraphs_sentences_and_whitespace,
r"Harvcolnb", r"Harvard citations", r"harvs", r"Harvp", r"Citation" ] cite_templates = wikitext.revision.template_names_matching( "|".join(CITE_TEMPLATES), name="ukwiki.revision.cite_templates") shortened_footnote_templates = wikitext.revision.template_names_matching( "sfn", name="ukwiki.revision.shortened_footnote_templates") all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags all_cite_templates = cite_templates + shortened_footnote_templates proportion_of_templates_references = \ all_cite_templates / max(all_ref_tags, 1) non_templated_references = max(all_ref_tags - all_cite_templates, 0) non_cite_templates = sub( wikitext.revision.templates, all_cite_templates, name="ukwiki.revision.non_cite_templates" ) # Links CATEGORY_LINKS = [ r"Категорія", r"Category", r"Категория" ] category_links = wikitext.revision.wikilink_titles_matching( "|".join(CATEGORY_LINKS), name="ukwiki.revision.category_links") IMAGE_LINKS = [ r"File", r"Файл", r"Image",
+++++++++++++++++ """ from revscoring.features import wikitext from revscoring.features.modifiers import log, max, sub from . import wikipedia cite_templates = wikitext.revision.template_names_matching( r"Kaynak|.*[ _]kaynağı", name="trwiki.revision.cite_templates") proportion_of_templated_references = \ cite_templates / max(wikitext.revision.ref_tags, 1) non_templated_references = max(wikitext.revision.ref_tags - cite_templates, 0) non_cite_templates = sub( wikitext.revision.templates, cite_templates, name="trwiki.revision.non_cite_templates" ) infobox_templates = wikitext.revision.template_names_matching( r".*[ _]bilgi[ _]kutusu", name="trwiki.revision.infobox_templates") # Copied (2015-10-29) from: # https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Citez_vos_sources#R.C3.A9clamation_et_contestation_de_sources cn_templates = wikitext.revision.template_names_matching( r"Kaynak[ _]belirt|Olgu|Fact|Delil", name="trwiki.revision.lvl1_cn_templates") main_article_templates = wikitext.revision.template_names_matching( r"Ana|Anamadde", name="trwiki.main_article_templates")
from revscoring.features import revision_oriented, wikitext as wikitext_features from revscoring.features.modifiers import sub from revscoring.languages import english from . import mediawiki, wikipedia, wikitext local_wiki = [ revision_oriented.revision.comment_matches( r"copy|edit|npov|wp:?el", name="enwiki.revision.comment.has_known_word"), revision_oriented.revision.comment_matches( r"\[\[WP:AES\|←\]\]", name="enwiki.revision.comment.is_aes"), sub(wikitext_features.revision.template_names_matching(r"^cite"), wikitext_features.revision.parent.template_names_matching(r"^cite"), name="enwiki.revision.diff.cite_templates_added") ] badwords = [ english.badwords.revision.diff.match_delta_sum, english.badwords.revision.diff.match_delta_increase, english.badwords.revision.diff.match_delta_decrease, english.badwords.revision.diff.match_prop_delta_sum, english.badwords.revision.diff.match_prop_delta_increase, english.badwords.revision.diff.match_prop_delta_decrease ] informals = [ english.informals.revision.diff.match_delta_sum, english.informals.revision.diff.match_delta_increase, english.informals.revision.diff.match_delta_decrease, english.informals.revision.diff.match_prop_delta_sum,
wikitext.revision.diff.markup_prop_delta_increase, wikitext.revision.diff.markup_prop_delta_decrease, wikitext.revision.diff.number_delta_sum, wikitext.revision.diff.number_delta_increase, wikitext.revision.diff.number_delta_decrease, wikitext.revision.diff.number_prop_delta_sum, wikitext.revision.diff.number_prop_delta_increase, wikitext.revision.diff.number_prop_delta_decrease, wikitext.revision.diff.uppercase_word_delta_sum, wikitext.revision.diff.uppercase_word_delta_increase, wikitext.revision.diff.uppercase_word_delta_decrease, wikitext.revision.diff.uppercase_word_prop_delta_sum, wikitext.revision.diff.uppercase_word_prop_delta_increase, wikitext.revision.diff.uppercase_word_prop_delta_decrease, sub(wikitext.revision.chars, wikitext.revision.parent.chars, name="revision.diff.chars_change"), sub(wikitext.revision.tokens, wikitext.revision.parent.tokens, name="revision.diff.tokens_change"), sub(wikitext.revision.words, wikitext.revision.parent.words, name="revision.diff.words_change"), sub(wikitext.revision.markups, wikitext.revision.parent.markups, name="revision.diff.markups_change"), sub(wikitext.revision.headings, wikitext.revision.parent.headings, name="revision.diff.headings_change"), sub(wikitext.revision.external_links, wikitext.revision.parent.external_links,
from revscoring.languages import english from . import mediawiki, wikipedia, wikitext local_wiki = [ revision_oriented.revision.comment_matches( r"copy|edit|npov|wp:?el", name="enwiki.revision.comment.has_known_word" ), revision_oriented.revision.comment_matches( r"\[\[WP:AES\|←\]\]", name="enwiki.revision.comment.is_aes" ), sub( wikitext_features.revision.template_names_matching(r"^cite"), wikitext_features.revision.parent.template_names_matching(r"^cite"), name="enwiki.revision.diff.cite_templates_added" ) ] badwords = [ english.badwords.revision.diff.match_delta_sum, english.badwords.revision.diff.match_delta_increase, english.badwords.revision.diff.match_delta_decrease, english.badwords.revision.diff.match_prop_delta_sum, english.badwords.revision.diff.match_prop_delta_increase, english.badwords.revision.diff.match_prop_delta_decrease ] informals = [ english.informals.revision.diff.match_delta_sum,
from revscoring.features import wikitext as wikitext_features from revscoring.features import revision_oriented from revscoring.languages.features import RegexMatches from revscoring.features.modifiers import sub from revscoring.languages import english from . import mediawiki, wikipedia, wikitext local_wiki = [ revision_oriented.revision.comment_matches( r"^delet", name="fandom.revision.comment.delete_request"), sub(wikitext_features.revision.template_names_matching(r"^delet"), wikitext_features.revision.parent.template_names_matching(r"^delet"), name="fandom.revision.diff.delete_added"), sub(wikitext_features.revision.wikilink_titles_matching( r"^category:(delet|candidat)"), wikitext_features.revision.parent.wikilink_titles_matching( r"^category:(delet|candidat)"), name="fandom.revision.diff.delete_category_added"), revision_oriented.revision.comment_matches( r"^redirected page to", name="fandom.revision.comment.likely_redirect") ] # Redirect page redirect_regex = r"redirect" redirects = RegexMatches("fandom.likely_redirect", [redirect_regex]) badwords = [ english.badwords.revision.diff.match_delta_sum, english.badwords.revision.diff.match_delta_increase, english.badwords.revision.diff.match_delta_decrease,
wikitext.revision.diff.markup_prop_delta_increase, wikitext.revision.diff.markup_prop_delta_decrease, wikitext.revision.diff.number_delta_sum, wikitext.revision.diff.number_delta_increase, wikitext.revision.diff.number_delta_decrease, wikitext.revision.diff.number_prop_delta_sum, wikitext.revision.diff.number_prop_delta_increase, wikitext.revision.diff.number_prop_delta_decrease, wikitext.revision.diff.uppercase_word_delta_sum, wikitext.revision.diff.uppercase_word_delta_increase, wikitext.revision.diff.uppercase_word_delta_decrease, wikitext.revision.diff.uppercase_word_prop_delta_sum, wikitext.revision.diff.uppercase_word_prop_delta_increase, wikitext.revision.diff.uppercase_word_prop_delta_decrease, sub(wikitext.revision.chars, wikitext.revision.parent.chars, name="revision.diff.chars_change"), sub(wikitext.revision.tokens, wikitext.revision.parent.tokens, name="revision.diff.tokens_change"), sub(wikitext.revision.words, wikitext.revision.parent.words, name="revision.diff.words_change"), sub(wikitext.revision.markups, wikitext.revision.parent.markups, name="revision.diff.words_change"), sub(wikitext.revision.headings, wikitext.revision.parent.headings, name="revision.diff.headings_change"), sub(wikitext.revision.external_links, wikitext.revision.parent.external_links,
positive_polarity = Feature( "english.sentiment.revision.positive_polarity", get_positive_score, depends_on=[sentiment_score], returns=float ) negative_polarity = Feature( "english.sentiment.revision.negative_polarity", get_negative_score, depends_on=[sentiment_score], returns=float ) diff_polarity = sub(positive_polarity, negative_polarity, name="english.sentiment.revision.diff_polarity") char_based = [ wikitext.revision.chars, wikitext.revision.whitespace_chars, wikitext.revision.markup_chars, wikitext.revision.cjk_chars, wikitext.revision.entity_chars, wikitext.revision.url_chars, wikitext.revision.word_chars, wikitext.revision.uppercase_word_chars, wikitext.revision.punctuation_chars, wikitext.revision.break_chars, wikitext.revision.longest_repeated_char, wikitext.revision.whitespace_chars / max(wikitext.revision.chars, 1),
"`float` : A ratio of important translations descriptions in the revision" all_sources_datasource = Datasource(name + ".all_sources", _process_all_sources, depends_on=[item]) all_sources = aggregators.len(all_sources_datasource) "`int` : A count of all sources in the revision" all_wikimedia_sources_datasource = Datasource( name + ".all_wikimedia_sources", _process_wikimedia_sources, depends_on=[all_sources_datasource]) all_wikimedia_sources = aggregators.len(all_wikimedia_sources_datasource) "`int` : A count of all sources which come from Wikimedia projects in the revision" all_external_sources = modifiers.sub(all_sources, all_wikimedia_sources) "A count of all sources which do not come from Wikimedia projects in the revision" external_sources_ratio = all_external_sources / modifiers.max( wikibase_features.revision.sources, 1) "A ratio/division between number of external references and number of claims that have references in the revision" unique_sources = Feature(name + ".unique_sources", _process_unique_sources, depends_on=[all_sources_datasource], returns=int) "`int` : A count of unique sources in the revision" # Status is_human = revision.has_property_value(properties.INSTANCE_OF, items.HUMAN,