def test_db_session_always_fresh(self): s = db.get_session() try: r = ReportStore() s.add(r) s.commit() r.id = None s.commit() except: pass # if the session is not cleaned up properly, this will throw an exception s = db.get_session() s.execute('select 1').fetchall() s = db.get_mw_session(mediawiki_project) try: u = MediawikiUser() s.add(u) s.commit() u.user_id = None s.commit() except: pass # if the session is not cleaned up properly, this will throw an exception s = db.get_mw_session(mediawiki_project) s.execute('select 1').fetchall()
def run(self): session = db.get_mw_session(self.project) try: result = self.metric(self.user_ids, session) return result finally: session.close()
def validate_users(wikiusers, project, validate_as_user_ids): """ Parameters wikiusers : the wikiusers with a candidate mediawiki_username project : the project these wikiusers should belong to validate_as_user_ids : if True, records will be checked against user_id if False, records are checked against user_name """ users_dict = {wu.raw_id_or_name : wu for wu in wikiusers} valid_project = True # validate try: session = db.get_mw_session(project) if validate_as_user_ids: keys_as_ints = [int(k) for k in users_dict.keys() if k.isdigit()] clause = MediawikiUser.user_id.in_(keys_as_ints) else: clause = MediawikiUser.user_name.in_(users_dict.keys()) matches = session.query(MediawikiUser).filter(clause).all() # no need to roll back session because it's just a query except OperationalError: # caused by accessing an unknown database # as it can be recovered, no need to reraise the error # but all users have to be marked as invalid msg = traceback.format_exc() task_logger.warning(msg) matches = [] valid_project = False except Exception, e: msg = traceback.format_exc() task_logger.error(msg) raise e
def validate_users(wikiusers, project, validate_as_user_ids): """ Parameters wikiusers : the wikiusers with a candidate mediawiki_username project : the project these wikiusers should belong to validate_as_user_ids : if True, records will be checked against user_id if False, records are checked against user_name """ users_dict = {wu.raw_id_or_name: wu for wu in wikiusers} valid_project = True # validate try: session = db.get_mw_session(project) if validate_as_user_ids: keys_as_ints = [int(k) for k in users_dict.keys() if k.isdigit()] clause = MediawikiUser.user_id.in_(keys_as_ints) else: clause = MediawikiUser.user_name.in_(users_dict.keys()) matches = session.query(MediawikiUser).filter(clause).all() # no need to roll back session because it's just a query except OperationalError: # caused by accessing an unknown database # as it can be recovered, no need to reraise the error # but all users have to be marked as invalid msg = traceback.format_exc() task_logger.warning(msg) matches = [] valid_project = False except Exception, e: msg = traceback.format_exc() task_logger.error(msg) raise e
def setUp(self): #**************************************************************** # set up and clean database (Warning: this DESTROYS ALL DATA) #**************************************************************** self.session = db.get_session() engine = db.get_mw_engine(mediawiki_project) db.MediawikiBase.metadata.create_all(engine, checkfirst=True) engine2 = db.get_mw_engine(second_mediawiki_project) db.MediawikiBase.metadata.create_all(engine2, checkfirst=True) ca_engine = db.get_ca_engine() db.CentralAuthBase.metadata.create_all(ca_engine) # mediawiki_project is a global defined on this file self.mwSession = db.get_mw_session(mediawiki_project) self.mwSession2 = db.get_mw_session(second_mediawiki_project) self.caSession = db.get_ca_session() DatabaseTest.tearDown(self)
def run(self): session = db.get_mw_session(self.project) results_by_user = self.metric(self.user_ids, session) results = { str(WikiUserKey(key, self.project, self.cohort_id)): value for key, value in results_by_user.items() } if not len(results): results = {NO_RESULTS: self.metric.default_result} return results
def run(self): session = db.get_mw_session(self.project) results_by_user = self.metric(self.user_ids, session) results = { str(WikiUserKey(key, self.project, self.cohort_id)) : value for key, value in results_by_user.items() } if not len(results): results = {NO_RESULTS : self.metric.default_result} return results
def validate_users(wikiusers, project, validate_as_user_ids): """ Parameters wikiusers : the wikiusers with a candidate mediawiki_username project : the project these wikiusers should belong to validate_as_user_ids : if True, records will be checked against user_id if False, records are checked against user_name """ session = db.get_mw_session(project) users_dict = {wu.mediawiki_username: wu for wu in wikiusers} try: # validate if validate_as_user_ids: keys_as_ints = [int(k) for k in users_dict.keys() if k.isdigit()] clause = MediawikiUser.user_id.in_(keys_as_ints) else: clause = MediawikiUser.user_name.in_(users_dict.keys()) matches = session.query(MediawikiUser).filter(clause).all() # update results for match in matches: if validate_as_user_ids: key = str(match.user_id) else: key = match.user_name users_dict[key].mediawiki_username = match.user_name users_dict[key].mediawiki_userid = match.user_id users_dict[key].valid = True users_dict[key].reason_invalid = None # remove valid matches users_dict.pop(key) # mark the rest invalid # key is going to be a string if bindings are correct, but careful! # it might be a string with chars that cannot be represented w/ ascii # the 'reason_invalid' does not need to have the user_id, # it is on the record on the table for key in users_dict.keys(): if validate_as_user_ids: users_dict[key].reason_invalid = "invalid user_id" else: users_dict[key].reason_invalid = "invalid user_name" users_dict[key].valid = False except Exception, e: msg = traceback.print_exc() task_logger.error(msg) # clear out the dictionary in case of an exception, and raise the exception for key in users_dict.keys(): users_dict.pop(key) raise e
def _is_mw_project_lagged(self, mw_project): """ Determines whether the given wiki is considered lagged or not. Parameters: mw_project: Name of the wiki to check. """ session = db.get_mw_session(mw_project) timestamp = session.query(Revision.rev_timestamp)\ .order_by(Revision.rev_timestamp.desc())\ .limit(1)\ .scalar() return timestamp is None or \ timestamp < datetime.now() - self._lag_threshold
def validate_users(wikiusers, project, validate_as_user_ids): """ Parameters wikiusers : the wikiusers with a candidate mediawiki_username project : the project these wikiusers should belong to validate_as_user_ids : if True, records will be checked against user_id if False, records are checked against user_name """ session = db.get_mw_session(project) users_dict = {wu.mediawiki_username: wu for wu in wikiusers} try: # validate if validate_as_user_ids: keys_as_ints = [int(k) for k in users_dict.keys() if k.isdigit()] clause = MediawikiUser.user_id.in_(keys_as_ints) else: clause = MediawikiUser.user_name.in_(users_dict.keys()) matches = session.query(MediawikiUser).filter(clause).all() # update results for match in matches: if validate_as_user_ids: key = str(match.user_id) else: key = parse_username(match.user_name) users_dict[key].mediawiki_username = match.user_name users_dict[key].mediawiki_userid = match.user_id users_dict[key].valid = True users_dict[key].reason_invalid = None # remove valid matches users_dict.pop(key) # mark the rest invalid for key in users_dict.keys(): if validate_as_user_ids: users_dict[key].reason_invalid = 'invalid user_id: {0}'.format(key) else: users_dict[key].reason_invalid = 'invalid user_name: {0}'.format(key) users_dict[key].valid = False except Exception, e: # clear out the dictionary in case of an exception, and raise the exception for key in users_dict.keys(): users_dict.pop(key) raise e
def setUp(self): #**************************************************************** # set up for every test - delete and re-create all needed records #**************************************************************** project = 'enwiki' self.session = db.get_session() engine = db.get_mw_engine(project) db.MediawikiBase.metadata.create_all(engine, checkfirst=True) self.mwSession = db.get_mw_session(project) DatabaseTest.tearDown(self) #**************************************************************** # create records for enwiki tests #**************************************************************** mw_user_dan = MediawikiUser(user_name='Dan') mw_user_evan = MediawikiUser(user_name='Evan') mw_user_andrew = MediawikiUser(user_name='Andrew') mw_user_diederik = MediawikiUser(user_name='Diederik') mw_logging = Logging(log_user_text='Reedy') mw_page = Page(page_namespace=0, page_title='Main_Page') mw_second_page = Page(page_namespace=209, page_title='Page in Namespace 209') self.mwSession.add_all([ mw_user_dan, mw_user_evan, mw_user_andrew, mw_logging, mw_page, mw_second_page, ]) self.mwSession.commit() self.createTestDataMetricPagesCreated(mw_user_evan) # edits in between Dan and Evan edits rev_before_1 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_diederik.user_id, rev_comment='before Dan edit 1', rev_len=4, rev_timestamp=datetime(2013, 05, 30), )
def generate_mediawiki_users(project, n, name_formatter): session = db.get_mw_session(project) try: user_count = ( session.query(func.count()). filter(MediawikiUser.user_name.like(name_formatter('%'))). one()[0] ) users_to_generate = n - user_count if users_to_generate > 0: start_index = user_count + 1 session.bind.engine.execute( MediawikiUser.__table__.insert(), [ { 'user_name' : name_formatter(start_index + i), 'user_registration' : '20130101000000' } for i in range(users_to_generate) ] ) session.commit() finally: session.close()
def setUp(self): #**************************************************************** # set up for every test - delete and re-create all needed records #**************************************************************** project = 'enwiki' self.session = db.get_session() engine = db.get_mw_engine(project) db.MediawikiBase.metadata.create_all(engine, checkfirst=True) self.mwSession = db.get_mw_session(project) DatabaseTest.tearDown(self) #**************************************************************** # create records for enwiki tests #**************************************************************** mw_user_dan = MediawikiUser(user_name='Dan') mw_user_evan = MediawikiUser(user_name='Evan') mw_user_andrew = MediawikiUser(user_name='Andrew') mw_user_diederik = MediawikiUser(user_name='Diederik') mw_logging = Logging(log_user_text='Reedy') mw_page = Page(page_namespace=0, page_title='Main_Page') mw_second_page = Page(page_namespace=209, page_title='Page in Namespace 209') self.mwSession.add_all([ mw_user_dan, mw_user_evan, mw_user_andrew, mw_logging, mw_page, mw_second_page, ]) self.mwSession.commit() self.createTestDataMetricPagesCreated(mw_user_evan) # edits in between Dan and Evan edits rev_before_1 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_diederik.user_id, rev_comment='before Dan edit 1', rev_len=4, rev_timestamp=datetime(2013, 05, 30), ) rev_before_2 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_diederik.user_id, rev_comment='before Dan edit 2', rev_len=0, rev_timestamp=datetime(2013, 06, 30), ) rev_before_3 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_diederik.user_id, rev_comment='before Evan edit 1', rev_len=0, rev_timestamp=datetime(2013, 05, 30), ) rev_before_4 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_diederik.user_id, rev_comment='before Evan edit 2', rev_len=100, rev_timestamp=datetime(2013, 06, 30), ) rev_before_5 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_diederik.user_id, rev_comment='before Evan edit 3', rev_len=140, rev_timestamp=datetime(2013, 07, 23), ) rev_alternate_namespace_1 = Revision( rev_page=mw_second_page.page_id, rev_user=mw_user_dan.user_id, rev_comment='first revision in namespace 209', # NOTE: VIM is freaking out if I type 08 below. Is this true on Mac? rev_len=100, rev_timestamp=datetime(2013, 8, 5), ) self.mwSession.add_all([ rev_before_1, rev_before_2, rev_before_3, rev_before_4, rev_before_5, rev_alternate_namespace_1, ]) self.mwSession.commit() # Dan edits rev1 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_dan.user_id, rev_comment='Dan edit 1', rev_parent_id=rev_before_1.rev_id, rev_len=0, rev_timestamp=datetime(2013, 06, 01), ) rev2 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_dan.user_id, rev_comment='Dan edit 2', rev_parent_id=rev_before_2.rev_id, rev_len=10, rev_timestamp=datetime(2013, 07, 01), ) # Evan edits rev3 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_evan.user_id, rev_comment='Evan edit 1', rev_parent_id=rev_before_3.rev_id, rev_len=100, rev_timestamp=datetime(2013, 06, 01), ) rev4 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_evan.user_id, rev_comment='Evan edit 2', rev_parent_id=rev_before_4.rev_id, rev_len=140, rev_timestamp=datetime(2013, 07, 01), ) rev5 = Revision( rev_page=mw_page.page_id, rev_user=mw_user_evan.user_id, rev_comment='Evan edit 3', rev_parent_id=rev_before_5.rev_id, rev_len=136, rev_timestamp=datetime(2013, 07, 24), ) self.mwSession.add_all([rev1, rev2, rev3, rev4, rev5]) self.mwSession.commit() #**************************************************************** # create basic test records for non-mediawiki tests #**************************************************************** dan_user = User(username='******') evan_user = User(username='******') web_test_user = User(email='*****@*****.**') # create a test cohort dan = WikiUser( mediawiki_username=mw_user_dan.user_name, mediawiki_userid=mw_user_dan.user_id, project=project ) evan = WikiUser( mediawiki_username=mw_user_evan.user_name, mediawiki_userid=mw_user_evan.user_id, project=project ) andrew = WikiUser( mediawiki_username=mw_user_andrew.user_name, mediawiki_userid=mw_user_andrew.user_id, project=project ) diederik = WikiUser( mediawiki_username=mw_user_diederik.user_name, mediawiki_userid=mw_user_diederik.user_id, project=project ) # create cohorts test_cohort = Cohort(name='test', enabled=True, public=False) private_cohort = Cohort(name='test_private', enabled=True, public=False) private_cohort2 = Cohort(name='test_private2', enabled=True, public=False) disabled_cohort = Cohort(name='test_disabled', enabled=False, public=False) self.session.add_all([ #report, dan_user, evan_user, web_test_user, dan, evan, andrew, diederik, test_cohort, private_cohort, private_cohort2, disabled_cohort]) self.session.commit() # create cohort membership dan_in_test = CohortWikiUser(wiki_user_id=dan.id, cohort_id=test_cohort.id) evan_in_test = CohortWikiUser(wiki_user_id=evan.id, cohort_id=test_cohort.id) andrew_in_test = CohortWikiUser(wiki_user_id=andrew.id, cohort_id=test_cohort.id) diederik_in_test = CohortWikiUser( wiki_user_id=diederik.id, cohort_id=test_cohort.id ) self.session.add_all([ dan_in_test, evan_in_test, andrew_in_test, diederik_in_test ]) self.session.commit() # create cohort ownership dan_owns_test = CohortUser( user_id=dan_user.id, cohort_id=test_cohort.id, role=CohortUserRole.OWNER, ) evan_owns_private = CohortUser( user_id=evan_user.id, cohort_id=private_cohort.id, role=CohortUserRole.OWNER, ) evan_owns_private2 = CohortUser( user_id=evan_user.id, cohort_id=private_cohort2.id, role=CohortUserRole.OWNER, ) web_user_owns_test = CohortUser( user_id=web_test_user.id, cohort_id=test_cohort.id, role=CohortUserRole.OWNER, ) web_user_owns_private = CohortUser( user_id=web_test_user.id, cohort_id=private_cohort.id, role=CohortUserRole.OWNER, ) web_user_owns_private2 = CohortUser( user_id=web_test_user.id, cohort_id=private_cohort2.id, role=CohortUserRole.OWNER, ) dan_views_private2 = CohortUser( user_id=dan_user.id, cohort_id=private_cohort2.id, role=CohortUserRole.VIEWER ) self.session.add_all([ dan_owns_test, evan_owns_private, evan_owns_private2, web_user_owns_test, web_user_owns_private, web_user_owns_private2, dan_views_private2 ]) self.session.commit() # add reports report_created = PersistentReport( user_id=web_test_user.id, status=celery.states.PENDING, queue_result_key=None, show_in_ui=True ) report_started = PersistentReport( user_id=web_test_user.id, status=celery.states.STARTED, queue_result_key=None, show_in_ui=True ) report_started2 = PersistentReport( user_id=web_test_user.id, status=celery.states.STARTED, queue_result_key=None, show_in_ui=True ) report_finished = PersistentReport( user_id=web_test_user.id, status=celery.states.SUCCESS, queue_result_key=None, show_in_ui=True ) self.session.add_all([ report_created, report_started, report_started2, report_finished ]) self.session.commit() #**************************************************************** # keep the test ids around so subclasses can use them #**************************************************************** self.test_report_id = report_created.id self.test_user_id = dan_user.id self.test_web_user_id = web_test_user.id self.test_cohort_id = test_cohort.id self.test_cohort_name = test_cohort.name self.test_cohort_user_id = dan_owns_test.id self.test_wiki_user_id = dan.id self.test_cohort_wiki_user_id = dan_in_test.id self.test_logging_id = mw_logging.log_id self.test_mediawiki_user_id = mw_user_dan.user_id self.test_mediawiki_user_id_evan = mw_user_evan.user_id self.test_mediawiki_user_id_andrew = mw_user_andrew.user_id self.test_mediawiki_user_id_diederik = mw_user_diederik.user_id self.test_page_id = mw_page.page_id self.test_revision_id = rev1.rev_id
def setUp(self): # create basic test records for non-mediawiki tests self.session = db.get_session() project = 'enwiki' engine = db.get_mw_engine(project) db.MediawikiBase.metadata.create_all(engine, checkfirst=True) self.mwSession = db.get_mw_session(project) DatabaseTest.tearDown(self) dan_user = User(username='******') evan_user = User(username='******') web_test_user = User(email='*****@*****.**') # create a test cohort dan = WikiUser(mediawiki_username='******', mediawiki_userid=1, project='enwiki') evan = WikiUser(mediawiki_username='******', mediawiki_userid=2, project='enwiki') andrew = WikiUser(mediawiki_username='******', mediawiki_userid=3, project='enwiki') diederik = WikiUser(mediawiki_username='******', mediawiki_userid=4, project='enwiki') # create cohorts test_cohort = Cohort(name='test', enabled=True, public=True) private_cohort = Cohort(name='test_private', enabled=True, public=False) private_cohort2 = Cohort(name='test_private2', enabled=True, public=False) disabled_cohort = Cohort(name='test_disabled', enabled=False, public=True) self.session.add_all([ #job, dan_user, evan_user, web_test_user, dan, evan, andrew, diederik, test_cohort, private_cohort, private_cohort2, disabled_cohort]) self.session.commit() # create cohort membership dan_in_test = CohortWikiUser(wiki_user_id=dan.id, cohort_id=test_cohort.id) evan_in_test = CohortWikiUser(wiki_user_id=evan.id, cohort_id=test_cohort.id) andrew_in_test = CohortWikiUser(wiki_user_id=andrew.id, cohort_id=test_cohort.id) diederik_in_test = CohortWikiUser(wiki_user_id=diederik.id, cohort_id=test_cohort.id) self.session.add_all([ dan_in_test, evan_in_test, andrew_in_test, diederik_in_test ]) self.session.commit() # create cohort ownership dan_owns_test = CohortUser( user_id=dan_user.id, cohort_id=test_cohort.id, role=CohortUserRole.OWNER, ) evan_owns_private = CohortUser( user_id=evan_user.id, cohort_id=private_cohort.id, role=CohortUserRole.OWNER, ) evan_owns_private2 = CohortUser( user_id=evan_user.id, cohort_id=private_cohort2.id, role=CohortUserRole.OWNER, ) web_user_owns_test = CohortUser( user_id=web_test_user.id, cohort_id=test_cohort.id, role=CohortUserRole.OWNER, ) web_user_owns_private = CohortUser( user_id=web_test_user.id, cohort_id=private_cohort.id, role=CohortUserRole.OWNER, ) web_user_owns_private2 = CohortUser( user_id=web_test_user.id, cohort_id=private_cohort2.id, role=CohortUserRole.OWNER, ) dan_views_private2 = CohortUser( user_id=dan_user.id, cohort_id=private_cohort2.id, role=CohortUserRole.VIEWER ) self.session.add_all([ dan_owns_test, evan_owns_private, evan_owns_private2, web_user_owns_test, web_user_owns_private, web_user_owns_private2, dan_views_private2 ]) self.session.commit() # add jobs job_created = PersistentJob( user_id=web_test_user.id, status=celery.states.PENDING, result_key=None, show_in_ui=True ) job_started = PersistentJob( user_id=web_test_user.id, status=celery.states.STARTED, result_key=None, show_in_ui=True ) job_started2 = PersistentJob( user_id=web_test_user.id, status=celery.states.STARTED, result_key=None, show_in_ui=True ) job_finished = PersistentJob( user_id=web_test_user.id, status=celery.states.SUCCESS, result_key=None, show_in_ui=True ) self.session.add_all([ job_created, job_started, job_started2, job_finished ]) self.session.commit() # create records for enwiki tests # TODO: make this safe to execute in any environment self.mwSession.add(MediawikiUser(user_id=1, user_name='Dan')) self.mwSession.add(MediawikiUser(user_id=2, user_name='Evan')) self.mwSession.add(MediawikiUser(user_id=3, user_name='Andrew')) self.mwSession.add(Logging(log_id=1, log_user_text='Reedy')) self.mwSession.add(Page(page_id=1, page_namespace=0, page_title='Main_Page')) # edits in between Dan and Evan edits self.mwSession.add(Revision( rev_id=1, rev_page=1, rev_user=4, rev_comment='before Dan edit 1', rev_len=4, rev_timestamp=datetime(2013, 05, 30), )) self.mwSession.add(Revision( rev_id=3, rev_page=1, rev_user=4, rev_comment='before Dan edit 2', rev_len=0, rev_timestamp=datetime(2013, 06, 30), )) self.mwSession.add(Revision( rev_id=5, rev_page=1, rev_user=4, rev_comment='before Evan edit 1', rev_len=0, rev_timestamp=datetime(2013, 05, 30), )) self.mwSession.add(Revision( rev_id=7, rev_page=1, rev_user=4, rev_comment='before Evan edit 2', rev_len=100, rev_timestamp=datetime(2013, 06, 30), )) self.mwSession.add(Revision( rev_id=9, rev_page=1, rev_user=4, rev_comment='before Evan edit 3', rev_len=140, rev_timestamp=datetime(2013, 07, 23), )) # Dan edits self.mwSession.add(Revision( rev_id=2, rev_page=1, rev_user=1, rev_comment='Dan edit 1', rev_parent_id=1, rev_len=0, rev_timestamp=datetime(2013, 06, 01), )) self.mwSession.add(Revision( rev_id=4, rev_page=1, rev_user=1, rev_comment='Dan edit 2', rev_parent_id=3, rev_len=10, rev_timestamp=datetime(2013, 07, 01), )) # Evan edits self.mwSession.add(Revision( rev_id=6, rev_page=1, rev_user=2, rev_comment='Evan edit 1', rev_parent_id=5, rev_len=100, rev_timestamp=datetime(2013, 06, 01), )) self.mwSession.add(Revision( rev_id=8, rev_page=1, rev_user=2, rev_comment='Evan edit 2', rev_parent_id=7, rev_len=140, rev_timestamp=datetime(2013, 07, 01), )) self.mwSession.add(Revision( rev_id=10, rev_page=1, rev_user=2, rev_comment='Evan edit 3', rev_parent_id=9, rev_len=136, rev_timestamp=datetime(2013, 07, 24), )) self.mwSession.commit()
def run(self): session = db.get_mw_session(self.project) return self.metric(self.user_ids, session)
# ignore flake8 because of F403 violation # flake8: noqa ##################################### # Run this script with # ipython -i scripts/debug.py ##################################### from sqlalchemy import func from wikimetrics.models import Revision, Page, Cohort, CohortUser, User, MediawikiUser #from wikimetrics.models.mediawiki import from tests.fixtures import DatabaseTest from tests.test_metrics.test_survivors import * #from wikimetrics.metrics import Survivors from wikimetrics.configurables import db import calendar # Mediawiki database d = db.get_mw_session("enwiki") # Wikimetrics database m = db.get_session() s = SurvivalTest() #s.setUp() # %load_ext autoreload # %autoreload 2