Пример #1
0
    def __init__(self, config_file):
        """groups needing edits and size N edits to be included which k edits to be displayed
        """
        config = yaml.safe_load(
            open(
                os.path.join(
                    Path(__file__).parent.parent, 'config', config_file), 'r'))
        self.config = config
        self.langs = config['langs']
        self.experiment_start_date = config['experiment_start_date']
        self.observation_back_days = config['observation_back_days']
        self.observation_start_date = self.experiment_start_date - datetime.timedelta(
            self.observation_back_days)
        self.mwapi_sessions = {
            lang: self.make_mwapi_session(lang)
            for lang in self.langs
        }
        self.wmf_con = make_wmf_con()
        self.wmf_db = {}
        self.wmf_db_hits = 0
        self.thankers = {}
        self.surveys = {}
        self.merged = {}
        self.merged_no_survey = {}
        self.analysis = {}
        self.superthankers = {}
        self.db_engine = init_engine()
        self.db_session = init_session()

        self.qualtrics_map = yaml.safe_load(
            open(
                os.path.join(
                    Path(__file__).parent.parent, 'config',
                    "qualtrics_to_interal_field_map.yaml"), 'r'))
Пример #2
0
 def __init__(self, thank_batch_size=1, lang=None):
     self.thank_batch_size = os.getenv('CS_WIKIPEDIA_OAUTH_BATCH_SIZE',
                                       thank_batch_size)
     logging.info(f"Thanking batch size set to : {self.thank_batch_size}")
     self.db_session = init_session()
     self.lang = lang
     logging.info(f"Thanking language set to. {self.lang}")
     self.consumer_token = mwoauth.ConsumerToken(
         os.environ['CS_OAUTH_CONSUMER_KEY'],
         os.environ['CS_OAUTH_CONSUMER_SECRET'])
     self.max_send_errors = int(
         os.environ['CS_OAUTH_THANKS_MAX_SEND_ERRORS'])
def add_num_quality_user(user_id, lang, namespace_fn_name, num_quality_revisions_replacement=None):
    db_session = init_session()
    wmf_con = make_wmf_con()
    namespace_fn = get_namespace_fn(namespace_fn_name)
    quality_revisions = num_quality_revisions if num_quality_revisions_replacement is None else num_quality_revisions_replacement
    num_quality = quality_revisions(user_id=user_id, lang=lang, wmf_con=wmf_con,
                                        namespace_fn=namespace_fn)
    user_rec = db_session.query(candidates).filter(candidates.lang == lang).filter(
        candidates.user_id == user_id).first()
    user_rec.user_editcount_quality = num_quality
    db_session.add(user_rec)
    db_session.commit()
 def __init__(self, config_file, fn):
     """groups needing edits and size N edits to be included which k edits to be displayed
     """
     config = yaml.safe_load(
         open(
             os.path.join(
                 Path(__file__).parent.parent, 'config', config_file), 'r'))
     self.fn = fn
     self.config = config
     self.db_session = init_session()
     self.inital_num_experiment_things = self.num_experiment_things()
     self.df = None
     self.ets_to_add = []
Пример #5
0
 def __init__(self, config=None):
     self.config = read_config_file(os.environ['CS_EXTRA_CONFIG_FILE'],
                                    __file__)
     self.db_engine = init_engine()
     self.db_session = init_session()
     if 'name' in self.config.keys():
         self.experiment_id = _get_experiment_id(self.db_session,
                                                 self.config['name'],
                                                 return_id=True)
     self.csv_dir = os.path.join(self.config["dirs"]['project'],
                                 self.config["dirs"]['reports'])
     self.date = datetime.datetime.today().strftime('%Y%m%d')
     self.queries = {}
     self.to_addrs = self.config['reports']['to_addrs']
     self.from_addr = self.config['reports']['from_addr']
     self.subject_stat = None
    def __init__(self,
                 config_file,
                 get_active_users_replacement=None,
                 db_session_replacement=None):
        """groups needing edits and size N edits to be included which k edits to be displayed
        """
        config = yaml.safe_load(
            open(
                os.path.join(
                    Path(__file__).parent.parent, 'config', config_file), 'r'))
        self.config = config
        self.langs = config['langs']
        self.min_edit_count = config['min_edit_count']
        self.wmf_con = make_wmf_con()
        self.db_session = init_session(
        ) if not db_session_replacement else db_session_replacement
        self.experiment_start_date = config['experiment_start_date']
        self.onboarding_earliest_active_date = self.experiment_start_date - timedelta(
            days=config['observation_back_days'])
        self.onboarding_latest_active_date = datetime.utcnow()
        self.populations = defaultdict(dict)
        self.namespace_fn = get_namespace_fn(config['namespace_fn'])
        self.get_active_users_replacement = get_active_users_replacement
        self.get_active_users = get_active_users if not get_active_users_replacement else get_active_users_replacement

        if 'max_onboarders_to_check' in self.config.keys():
            self.max_onboarders_to_check = self.config[
                'max_onboarders_to_check']
        else:
            self.max_onboarders_to_check = None

        self.users_in_thanker_experiment = {
            "ar": [],
            "de": [],
            "fa": [],
            "pl": [],
            "en": []
        }

        self.q = Queue(name='onboarder_thankee', connection=Redis())
        self.failed_q = Queue(name='failed', connection=Redis())
 def __init__(self,
              lang=None,
              enable_create_actions=True,
              enable_execute_actions=True):
     self.config = read_config_file(os.environ['CS_EXTRA_CONFIG_FILE'],
                                    __file__)
     # self.config = read_config_file(os.environ['CS_EXTRA_CONFIG_FILE'], __file__) #consider changing to os.path.abspath('')
     self.batch_size = int(os.getenv('CS_WIKIPEDIA_ACTION_BATCH_SIZE', 2))
     logging.info(f"Survey batch size set to : {self.batch_size}")
     self.db_session = init_session()
     self.lang = os.getenv('CS_WIKIPEDIA_LANG', lang)
     logging.info(f"Survey sending language set to. {self.lang}")
     self.consumer_token = mwoauth.ConsumerToken(
         os.environ['CS_OAUTH_CONSUMER_KEY'],
         os.environ['CS_OAUTH_CONSUMER_SECRET'])
     self.max_send_errors = int(
         os.getenv('CS_OAUTH_THANKS_MAX_SEND_ERRORS', 5))
     self.intervention_type = self.config['settings']['intervention_type']
     self.intervention_name = self.config['settings']['intervention_name']
     self.api_con = None  # a slot for a connection or session to keep open between different phases.
     self.dry_run = bool(int(os.getenv('CS_DRY_RUN', False)))
     self.enable_create_actions = enable_create_actions
     self.enable_execute_actions = enable_execute_actions
    def sample_population(self, lang):
        """
        - for incomplete groups:
        - sample active users
        - remove users with less than n edits
        - remove editors in thanker experiment
        - assign experience level (once only)
        - update/insert candidates
        - iterative representative sampling
        - add thanks history
        - add emailable status
        - add labour hours
        """
        # Get the active users
        if "custom_users" in self.config["langs"][lang].keys():
            # not sampling active users but cheating with custom_users list
            active_users = get_specific_users(
                lang,
                self.config['langs'][lang]["custom_users"],
                wmf_con=self.wmf_con)
        else:
            active_users = self.get_active_users(
                lang,
                start_date=self.onboarding_earliest_active_date,
                end_date=self.onboarding_latest_active_date,
                min_rev_id=self.langs[lang]['min_rev_id'],
                wmf_con=self.wmf_con)
        active_users_bots = self.add_bots(active_users, lang)

        logging.info(
            f"length of active users before bot check {len(active_users_bots)}"
        )
        active_users_no_bots = active_users_bots[
            active_users_bots['is_official_bot'] == False]
        bots = active_users_bots[active_users_bots['is_official_bot'] == True]
        logging.info(
            f"length of active users after bot check {len(active_users_no_bots)}"
        )

        logging.info(f"active bots are {bots[['user_name','user_editcount']]}")
        # Subset to: - minimum edits
        active_users_min_edits = active_users_no_bots[
            active_users['user_editcount'] >=
            self.min_edit_count]  # need to have at least this many edits
        # Subset to non-thanker experiment
        active_users_min_edits_nonthanker = active_users_min_edits[
            active_users_min_edits["user_id"].apply(
                lambda uid: uid not in self.users_in_thanker_experiment[lang])]
        # Add experience levels
        active_users_min_edits_nonthanker_exp = add_experience_bin(
            active_users_min_edits_nonthanker, self.experiment_start_date)

        logging.info(
            f"Group {lang} has {len(active_users_min_edits_nonthanker_exp)} active users with 4 edits in history."
        )

        # Now work on groups
        groups = self.config['langs'][lang]['groups']
        for group_name, inclusion_criteria in groups.items():
            df = self.get_quality_data_for_group(
                super_group=active_users_min_edits_nonthanker_exp,
                lang=lang,
                group_name=group_name,
                inclusion_criteria=inclusion_criteria)

            if self.get_active_users_replacement:
                continue
            ## Nota Bene. This is where things ge a bit wonky.
            # 1. at first I thought that I would store the user state in a candidates table, and in fact
            # that is useful for the sake of being able to multiprocess the quality-edits revision
            # however it is a pain to update columns in the grow-only right pandas-style, which the rest of the
            # independent variables. in addition since we aren't onboarding in a rolling-state, but once every
            # active-window-days, we don't really need to store the state to compare it. at ths point in collecting
            # data we switch to the pandas style and keep the user state is a dict of data frames "population".
            # So todo: reconcile the two ways to store state.
            # add previous thanks received last 90 /84

            # refereshing con here, sometimes gets stale after waiting
            self.wmf_con = make_wmf_con()
            self.db_session = init_session()

            logging.info('adding labour hours')
            if "labor_hours_84_days_pre_sample" not in df.columns:
                df = add_labour_hours(
                    df,
                    lang,
                    start_date=self.onboarding_earliest_active_date,
                    end_date=self.onboarding_latest_active_date,
                    wmf_con=self.wmf_con,
                    col_label="labor_hours_84_days_pre_sample")
                self.df_to_db_col(lang, df, 'labor_hours_84_days_pre_sample')

            logging.info(f'adding email df')
            if 'has_email' not in df.columns:
                df = add_has_email(df, lang, self.wmf_con)
                self.df_to_db_col(lang, df, 'has_email')

            logging.info(f'adding num prev_thanks_pre_sample')
            if "num_prev_thanks_pre_sample" not in df.columns:
                start_date = self.onboarding_earliest_active_date if group_name == 'newcomer' else THANK_FEATURE_INTRODUCITON
                df = add_thanks_receiving(
                    df,
                    lang,
                    start_date=start_date,
                    end_date=self.onboarding_latest_active_date,
                    wmf_con=self.wmf_con,
                    col_label='num_prev_thanks_pre_sample')
                self.df_to_db_col(lang, df, 'num_prev_thanks_pre_sample')

            logging.info(
                f"Group {lang}-{group_name} Saving {len(df)} as included.")
            df['user_included'] = True
            self.df_to_db_col(lang, df, 'user_included')
def db_session():
    return init_session()
def db_session():
    session = init_session()
    yield session
    session.close()