def load(self): with open("enwiki.goodfaith.gradient_boosting.model") as f: self.model = Model.load(f) self.extractor = api.Extractor( mwapi.Session("https://en.wikipedia.org", user_agent="KFServing revscoring demo")) self.ready = True
def extract_features(label_file,context): rev_ids = [json.loads(label) for label in load_labels(label_file)] session = mwapi.Session( host= "https://{0}.wikipedia.org".format( context.replace("wiki","")), user_agent="Ores bias analysis project by Nate TeBlunthuis <*****@*****.**>") dependent_names = ["editquality.feature_lists.{0}.damaging".format(context), "editquality.feature_lists.{0}.goodfaith".format(context)] dependents = [] for dependent_path in dependent_names: dependent_or_list = yamlconf.import_path(dependent_path) if isinstance(dependent_or_list, Dependent): dependents.append(dependent_or_list) else: dependents.extend(dependent_or_list) extractor = api.Extractor(session) features = extract(dependents, rev_ids, extractor,extractors=os.cpu_count() - 1) return features
def main(): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.INFO, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') logging.getLogger('requests').setLevel(logging.WARNING) rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin)) scorer_model = MLScorerModel.load(open(args['<model-file>'])) session = mwapi.Session( args['--host'], user_agent="Anon bias study <*****@*****.**>") extractor = api.Extractor(session) score_processor = ScoreProcessor(scorer_model, extractor) cache = json.loads(args['--cache'] or "{}") verbose = args['--verbose'] debug = args['--debug'] run(rev_ids, score_processor, cache, verbose, debug)
def get_and_store_features(diff, url): try: print(diff, url) session = mwapi.Session( url, api_path='/api.php', user_agent='Cynthia - Vandalism detection bot, @noreplyz') api_extractor = api.Extractor(session) with open('20k-features-damaging-2.tsv', 'a') as f: # print(diff) features = list(api_extractor.extract(int(diff), damaging)) features = [str(fea) for fea in features] f.write(url + '/wiki/?diff=' + diff + '\t' + '\t'.join(features) + '\n') except RevisionNotFound: print('Revision not found.') return except Exception: print('Wiki closed, or other issue.') return
english.badwords.revision.diff.match_prop_delta_sum, # Measures the proportional change in "informals" english.informals.revision.diff.match_prop_delta_sum, # Measures the proportional change meaningful words english.stopwords.revision.diff.non_stopword_prop_delta_sum, # Is the user anonymous revision_oriented.revision.user.is_anon, # Is the user a bot or a sysop revision_oriented.revision.user.in_group({'bot', 'sysop'}), # How long ago did the user register? temporal.revision.user.seconds_since_registration ] trainingRevId = [] testRevId = [] api_extractor = api.Extractor(session) """ sample = [] with open('datasample.csv') as csv_file: data_csv_reader = csv.reader(csv_file, delimiter=',') for row in data_csv_reader: if row != []: sample.append(row[1]) sampleData = [] sampleInfo = [] for revid in sample: revid = int(revid) try: #print("https://en.wikipedia.org/wiki/?diff={0}".format(revid)) sampleRevData = list(api_extractor.extract(revid, features))
import mwapi from revscoring import ScorerModel from revscoring.extractors import api with open("models/enwiki.damaging.linear_svc.model") as f: model = ScorerModel.load(f) extractor = api.Extractor( mwapi.Session(host="https://en.wikipedia.org", user_agent="revscoring demo")) values = extractor.extract(123456789, model.features) print(model.score(values))
import mwapi import bz2 from revscoring import Model from revscoring.extractors import api model = Model.load( bz2.open("models/ptwiki.draft_quality.gradient_boosting.model.bz2", "rb")) extractor = api.Extractor( mwapi.Session(host="https://pt.wikipedia.org", user_agent="draftquality test")) values = extractor.extract(58071111, model.features) print(model.score(values))
def get_extractor(lang="de"): session = mwapi.Session("https://%s.wikipedia.org" % lang, user_agent=USER_AGENT) return api.Extractor(session)
def get_features(self, wiki, diff): session = mwapi.Session(wiki, api_path='/api.php', user_agent=self.config['user_agent']) api_extractor = api.Extractor(session) return list(api_extractor.extract(int(diff), damaging))
def get_features(wiki, diff, featurelist): session = mwapi.Session('https://' + wiki, api_path='/api.php', user_agent='Cynthia testing') api_extractor = api.Extractor(session) return list(api_extractor.extract(int(diff), featurelist))