Пример #1
0
    def __init__(self,year,activation=5,overall=False):

        self.year = year
        '''The year the cohort started.
        '''
        self.activation = activation
        '''Minimum number of edits per month to be included in the cohort
        '''
        self.overall = overall

        ts,ts_i = utils.create_time_stamps_month(fromym='%s01'%self.year,toym='201012')
        self.time_stamps = ts 
        self.time_stamps_index = ts_i
        '''Only take time_stamps starting with self.year
        '''

        self.cohorts = [self.year,'others']
        '''Cohort definition
        '''
        self.cohort_labels = ['%s cohort'% i for i in self.cohorts]
        '''Cohort labels
        '''

        if self.overall:
            self.sqlQuery = "SELECT *  FROM fabian WHERE user_id IN (SELECT user_id FROM fabian WHERE first_edit_year=%s GROUP BY user_id HAVING SUM(add_edits)>%s);"%(self.year,self.activation)
        else:
            self.sqlQuery = 'SELECT *  FROM fabian WHERE add_edits > %s AND first_edit_year=%s'%(self.activation,self.year)

        Cohort.__init__(self)
Пример #2
0
    def __init__(self, period=3):

        # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have
        # this information
        # Note that len(self.cohorts) has to be equal the number of bins in the numpy.array
        # self.cohorts = [1,3,5,7,10,20,40,60,80,100,200,500,1000,5000,10000,'>10000 edits']
        self.cohorts = [1, 5, 10, 50, 100, 500, 1000, '>1000 edits']
        '''Cohort definition
        '''
        self.cohort_labels = self.cohorts[:]
        self.cohort_labels[0] = '%s edit' % (self.cohort_labels[0])
        for i in range(1, len(self.cohorts) - 1):
            self.cohort_labels[i] = '%s-%s edits' % (self.cohorts[i - 1] + 1,
                                                     self.cohorts[i])
        # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts]
        '''Cohort labels
        '''
        self.period = period
        '''The number of month an editor is considered new
        '''
        self.old_user_id = None
        '''The user_id of the previously encountered editor as we iterate through the table
        '''
        self.lastym = None
        '''The ym at the end of the `period` months after the first edit of an editor 
        '''
        self.firstedit = None
        self.fe_index = None

        self.edits = 0
        self.added = 0
        self.removed = 0
        self.net = 0

        Cohort.__init__(self)
Пример #3
0
    def __init__(self, minedits=1, maxedits=None):

        self.cohorts = [int(i) for i in range(0, len(settings.time_stamps))]
        '''Cohort definition
        '''
        self.cohort_labels = ['%s month old' % i for i in self.cohorts]
        '''Cohort labels
        '''

        self.sqlQuery = 'SELECT * FROM %s;' % tables.EDITOR_YEAR_MONTH
        '''The SQL query returns edit information for each editor for each ym she has edited.'''

        self.minedits = minedits
        '''Minimum number of edits by editor in a given month to be included'''

        self.maxedits = maxedits
        '''Maximum number of edits by editor in a given month to be included'''

        self.ncolors = utils.numberOfMonths(settings.time_stamps[0],
                                            settings.time_stamps[-1]) / 6
        '''
        Number of visible colors in the wikipride plots. E.g. one color for every six month for wikipride plots
        '''

        Cohort.__init__(self)
Пример #4
0
    def __init__(self):

        if self.cohorts is None or self.cohort_labels is None:
            logger.error(
                "self.cohorts or self.cohort_labels not properly defined")
            # raise Exception("self.cohorts or self.cohort_labels not properly defined")
        Cohort.__init__(self)
Пример #5
0
    def __init__(self,reverttype,activation=1):
        '''
        arg reverttype: str, type of revert: 'reverting','reverted','revertedto'
        '''
        self.reverttype = reverttype

        self.activation = activation
        '''Minimum number of reverts by editor to be included'''

        Cohort.__init__(self)
Пример #6
0
    def __init__(self):

        # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have
        # this information
        # Note that len(self.cohorts) has to be equal the number of bins in the numpy.array
        # self.cohorts = [1,3,5,7,10,20,40,60,80,100,200,500,1000,5000,10000,'>10000 edits']
        self.cohorts = [1,5,10,50,100,500,1000,'>1000 edits']
        '''Cohort definition
        '''
        self.cohort_labels = self.cohorts[:]
        self.cohort_labels[0] = '%s edit'%(self.cohort_labels[0])
        for i in range(1,len(self.cohorts)-1):
            self.cohort_labels[i] = '%s-%s edits'%(self.cohorts[i-1]+1,self.cohorts[i])
        # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts]
        '''Cohort labels
        '''                        
        Cohort.__init__(self)
Пример #7
0
    def __init__(self,activation=1):


        # editor_types = ['Administrator','Bureaucrat','Eliminator','Huggler','Bot','Multiple','Other']
        editor_types = ['Administrator','Huggler','Bot','Admin & Huggle','Other']

        self.editor_types_dict = {a : i for i,a in enumerate(editor_types)}
        '''Dictionary mapping editory type to numpy.array matrix
        '''
        self.cohorts = [int(i) for i in range(0,len(editor_types))]
        '''Cohort definition
        '''                
        self.cohort_labels = editor_types
        '''Cohort labels
        '''     

        #initialize helper structures
        try:
            from db import sql

            cur = sql.getSSCursor()

            # administrators
            cur.execute("select user_id from ptwiki_p.user u join ptwiki_p.user_groups ug on (u.user_id=ug.ug_user) where ug.ug_group='sysop';")
            self.administrators = set(i[0] for i in cur)
            # bureaucrat
            cur.execute("select user_id from ptwiki_p.user u join ptwiki_p.user_groups ug on (u.user_id=ug.ug_user) where ug.ug_group='bureaucrat';")
            self.bureaucrats = set(i[0] for i in cur)
            # eliminators
            cur.execute("select user_id from ptwiki_p.user u join ptwiki_p.user_groups ug on (u.user_id=ug.ug_user) where ug.ug_group='eliminator';")
            self.eliminators = set(i[0] for i in cur)
            # hugglers
            cur.execute("select user_id from u_declerambaul.ptwiki_huggle;")
            self.hugglers = set(i[0] for i in cur)
            # bots
            cur.execute("select user_id from u_declerambaul.ptwiki_bots;")
            self.bots = set(i[0] for i in cur)
            

            self.sqlQuery = 'SELECT * FROM u_declerambaul.ptwiki_%s_editor_year_month;'%reverttype

        except:
            logging.error("Could not establish SQL connection to initialize RevertsByEditorType.")

        Cohort.__init__(self)
Пример #8
0
    def __init__(self,activitylevels=[5,10,100]):

        self.cohorts = activitylevels
        '''Cohort definition
        '''                
        self.cohort_labels = ['%s+ edits'%l for l in self.cohorts]
        '''Cohort labels
        '''     
        
        self.sqlQuery = 'SELECT * FROM %s;'%tables.EDITOR_YEAR_MONTH_NS0_NOREDIRECT
        '''The SQL query returns edit information for each editor for each ym she has edited.'''

        # self.ncolors = utils.numberOfMonths(settings.time_stamps[0],settings.time_stamps[-1])/6
        '''
        Number of visible colors in the wikipride plots. 
        '''

        Cohort.__init__(self)
Пример #9
0
    def __init__(self):


        self.cohorts = ('0', '1', '2', '3', '4', '5','other')
        '''Cohort definition
        '''
        self.cohort_labels = ['%s namespace'% i for i in self.cohorts]
        '''Cohort labels
        '''
                

        self.cohort_index = {'0':0, '1':1, '2':2, '3':3, '4':4, '5':5}

        self.sqlQuery = 'SELECT * FROM %s;'%tables.EDITOR_YEAR_MONTH_NAMESPACE
        '''The SQL query returns edit information for each editor for each ym she has edited.'''


        Cohort.__init__(self)
Пример #10
0
    def __init__(self, activitylevels=[5, 10, 100]):

        self.cohorts = activitylevels
        '''Cohort definition
        '''
        self.cohort_labels = ['%s+ edits' % l for l in self.cohorts]
        '''Cohort labels
        '''

        self.sqlQuery = 'SELECT * FROM %s;' % tables.EDITOR_YEAR_MONTH_NS0_NOREDIRECT
        '''The SQL query returns edit information for each editor for each ym she has edited.'''

        # self.ncolors = utils.numberOfMonths(settings.time_stamps[0],settings.time_stamps[-1])/6
        '''
        Number of visible colors in the wikipride plots. 
        '''

        Cohort.__init__(self)
Пример #11
0
    def __init__(self):

        # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have
        # this information
        # Note that len(self.cohorts) has to be equal the number of bins in the numpy.array
        # self.cohorts = [1,3,5,7,10,20,40,60,80,100,200,500,1000,5000,10000,'>10000 edits']
        self.cohorts = [1, 5, 10, 50, 100, 500, 1000, '>1000 edits']
        '''Cohort definition
        '''
        self.cohort_labels = self.cohorts[:]
        self.cohort_labels[0] = '%s edit' % (self.cohort_labels[0])
        for i in range(1, len(self.cohorts) - 1):
            self.cohort_labels[i] = '%s-%s edits' % (self.cohorts[i - 1] + 1,
                                                     self.cohorts[i])
        # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts]
        '''Cohort labels
        '''
        Cohort.__init__(self)
Пример #12
0
    def __init__(self):

        # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have
        # this information
        self.cohorts = [1, 5, 50, 100, 500, 1000, '>1000 edits']
        '''Cohort definition
        '''
        self.cohort_labels = self.cohorts[:]
        self.cohort_labels[0] = '%s edit' % (self.cohort_labels[0])
        for i in range(1, len(self.cohorts) - 1):
            self.cohort_labels[i] = '%s-%s edits' % (self.cohorts[i - 1] + 1,
                                                     self.cohorts[i])
        # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts]
        '''Cohort labels
        '''

        self.sqlQuery = 'SELECT * FROM %s;' % tables.EDITOR_YEAR_MONTH
        '''The SQL query returns edit information for each editor for each ym she has edited.'''

        Cohort.__init__(self)
Пример #13
0
    def __init__(self):

        # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have
        # this information        
        self.cohorts = [1,5,50,100,500,1000,'>1000 edits']
        '''Cohort definition
        '''
        self.cohort_labels = self.cohorts[:]
        self.cohort_labels[0] = '%s edit'%(self.cohort_labels[0])
        for i in range(1,len(self.cohorts)-1):
            self.cohort_labels[i] = '%s-%s edits'%(self.cohorts[i-1]+1,self.cohorts[i])
        # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts]
        '''Cohort labels
        '''                        

        self.sqlQuery = 'SELECT * FROM %s;'%tables.EDITOR_YEAR_MONTH
        '''The SQL query returns edit information for each editor for each ym she has edited.'''


        Cohort.__init__(self)
Пример #14
0
    def __init__(self):


        self.cohorts = ['New Editors']
        '''Cohort definition
        '''
        self.cohort_labels = self.cohorts
        '''Cohort labels
        '''
                

        self.sqlQuery = """SELECT  
            first_edit_year, 
            first_edit_month, 
            count(*) AS recruits
        FROM
            %s
        GROUP BY
            first_edit_year,
            first_edit_month;"""%tables.USER_COHORT
        '''The SQL query returns the new editor count for each ym.'''

        Cohort.__init__(self)
Пример #15
0
    def __init__(self,minedits=1,maxedits=None):

        self.cohorts = [int(i) for i in range(0,len(settings.time_stamps))]        
        '''Cohort definition
        '''
        self.cohort_labels = ['%s month old'% i for i in self.cohorts]
        '''Cohort labels
        '''                   
        
        self.sqlQuery = 'SELECT * FROM %s;'%tables.EDITOR_YEAR_MONTH
        '''The SQL query returns edit information for each editor for each ym she has edited.'''

        self.minedits = minedits
        '''Minimum number of edits by editor in a given month to be included'''

        self.maxedits = maxedits
        '''Maximum number of edits by editor in a given month to be included'''

        self.ncolors = utils.numberOfMonths(settings.time_stamps[0],settings.time_stamps[-1])/6
        '''
        Number of visible colors in the wikipride plots. E.g. one color for every six month for wikipride plots
        '''

        Cohort.__init__(self)
Пример #16
0
    def __init__(self,activation=5):


        self.activation =  activation

        self.NS = ( '4', '5' )

        ts,ts_i = utils.create_time_stamps_month(fromym='200401',toym='201012')
        self.time_stamps = ts 
        self.time_stamps_index = ts_i
        '''Only take time_stamps starting with self.year
        '''

        self.cohorts = range(2004,2011)
        '''Cohort definition
        '''
        self.cohort_labels = ['%s cohort'% i for i in self.cohorts]
        '''Cohort labels
        '''

        # self.sqlQuery = 'SELECT * FROM fabian WHERE namespace IN (4,5) AND add_edits > %s AND first_edit_year in (%s)'%(self.activation,','.join([str(c) for c in self.cohorts]))


        Cohort.__init__(self)
Пример #17
0
    def __init__(self,period=3  ):

        # We don't take into consideration people who have 0 edits as the data is coded sparse and we don't have
        # this information
        # Note that len(self.cohorts) has to be equal the number of bins in the numpy.array
        # self.cohorts = [1,3,5,7,10,20,40,60,80,100,200,500,1000,5000,10000,'>10000 edits']
        self.cohorts = [1,5,10,50,100,500,1000,'>1000 edits']
        '''Cohort definition
        '''
        self.cohort_labels = self.cohorts[:]
        self.cohort_labels[0] = '%s edit'%(self.cohort_labels[0])
        for i in range(1,len(self.cohorts)-1):
            self.cohort_labels[i] = '%s-%s edits'%(self.cohorts[i-1]+1,self.cohorts[i])
        # self.cohort_labels = ['<%s edits'%(e) for e in self.cohorts]
        '''Cohort labels
        '''             
        self.period = period
        '''The number of month an editor is considered new
        '''
        self.old_user_id = None
        '''The user_id of the previously encountered editor as we iterate through the table
        '''
        self.lastym = None
        '''The ym at the end of the `period` months after the first edit of an editor 
        '''
        self.firstedit = None
        self.fe_index = None

        self.edits = 0
        self.added = 0
        self.removed = 0
        self.net = 0

        

        Cohort.__init__(self)
Пример #18
0
    def __init__(self):

        if self.cohorts is None or self.cohort_labels is None:
            logger.error("self.cohorts or self.cohort_labels not properly defined")
            # raise Exception("self.cohorts or self.cohort_labels not properly defined")
        Cohort.__init__(self)