Пример #1
0
 def __init__(self, name, **kwargs):
     self.name = name
     self.projects = []
     self.time = data_converter.create_datacontainer(datatype='list')
     self.verified = True
     for kw in kwargs:
         setattr(self, kw, kwargs[kw])
Пример #2
0
def determine_article_count(articles_edited, first_year, final_year):
    '''
    This function counts the number of unique articles by year edited by a
    particular editor.
    '''
    dc = data_converter.create_datacontainer(first_year, final_year)
    dc = data_converter.add_months_to_datacontainer(dc, 'dict')
    for year in articles_edited:
        for month in articles_edited[year]:
            for ns in articles_edited[year][month]:
                dc[year][month][ns] = len(articles_edited[year][month][ns])
    dc = cleanup_datacontainer(dc, {})
    return dc
Пример #3
0
def determine_last_edit_by_year(edits, first_year, final_year):
    '''
    This function determines the date of the last edit in a given year for a
    given editor. 
    '''
    dc = data_converter.create_datacontainer(first_year, final_year, 0)
    for edit in edits:
        date = str(edit['date'].year)
        if dc[date] == 0:
            dc[date] = edit['date']
        elif dc[date] < edit['date']:
            dc[date] = edit['date']
    return dc
Пример #4
0
def determine_number_edits(edits, first_year, final_year):
    '''
    This function counts the number of edits per namespace per month per year. 
    '''
    dc = data_converter.create_datacontainer(first_year, final_year)
    dc = data_converter.add_months_to_datacontainer(dc, 'dict')
    for edit in edits:
        ns = str(edit['ns'])
        year = str(edit['date'].year)
        month = str(edit['date'].month)
        dc[year][month].setdefault(ns, 0)
        dc[year][month][ns] += 1
    dc = cleanup_datacontainer(dc, {})
    return dc
Пример #5
0
def determine_number_reverts(edits, first_year, final_year):
    '''
    This function counts the number of times an edit was reverted in a given
    month/year. 
    '''
    dc = data_converter.create_datacontainer(first_year, final_year)
    dc = data_converter.add_months_to_datacontainer(dc, 'dict')
    for edit in edits:
        year = str(edit['date'].year)
        month = str(edit['date'].month)
        ns = str(edit['ns'])
        if edit['revert']:
            dc[year][month].setdefault(ns, 0)
            dc[year][month][ns] += 1
    dc = cleanup_datacontainer(dc, {})
    return dc
Пример #6
0
def determine_namespaces_workedon(edits, first_year, final_year):
    '''
    This function creates a list of namespaces that an editor has worked on in 
    a given month/year.
    '''
    dc = data_converter.create_datacontainer(first_year, final_year)
    dc = data_converter.add_months_to_datacontainer(dc, 'set')
    for edit in edits:
        year = str(edit['date'].year)
        month = str(edit['date'].month)
        dc[year][month].add(edit['ns'])
    for year in dc:
        for month in dc[year]:
            dc[year][month] = list(dc[year][month])
    dc = cleanup_datacontainer(dc, [])
    return dc
Пример #7
0
    def add(self, key, value):
        if value == 'NEXT':
            self.n += 1
            edits = self.drop_years_no_obs(self.editors[key]['edits'])
            self.insert(key, edits, self.editors[key]['username'])
            del self.editors[key]
        else:
            if key not in self.editors:
                self.editors[key] = {}
                self.editors[key]['obs'] = 0
                self.editors[key][
                    'edits'] = data_converter.create_datacontainer(
                        2001, self.final_year, 'list')
                self.editors[key]['username'] = value.pop('username')
            else:
                value.pop('username')

            year = str(value['date'].year)
            self.editors[key]['edits'][year].append(value)
            self.editors[key]['obs'] += 1
Пример #8
0
def determine_edit_volume(edits, first_year, final_year):
    '''
    This function counts the number of characters added and remove  by year 
    by month by namespace for a particular editor. 
    '''
    dc = data_converter.create_datacontainer(first_year, final_year)
    dc = data_converter.add_months_to_datacontainer(dc, 'dict')
    for edit in edits:
        year = str(edit['date'].year)
        month = str(edit['date'].month)
        ns = str(edit['ns'])
        dc[year][month].setdefault(ns, {})
        if edit['delta'] < 0:
            dc[year][month][ns].setdefault('removed', 0)
            dc[year][month][ns]['removed'] += edit['delta']
        elif edit['delta'] > 0:
            dc[year][month][ns].setdefault('added', 0)
            dc[year][month][ns]['added'] += edit['delta']
    dc = cleanup_datacontainer(dc, {})
    return dc
Пример #9
0
def determine_articles_workedon(edits, first_year, final_year):
    '''
    This function creates a list of article_ids that an editor has worked on in 
    a given month/year. 
    '''
    dc = data_converter.create_datacontainer(first_year, final_year)
    dc = data_converter.add_months_to_datacontainer(dc, 'dict')
    for edit in edits:
        year = str(edit['date'].year)
        month = str(edit['date'].month)
        ns = str(edit['ns'])
        dc[year][month].setdefault(ns, set())
        dc[year][month][ns].add(edit['article_id'])

    #convert the set to a list as mongo cannot store sets.
    for year in dc:
        for month in dc[year]:
            for ns in dc[year][month]:
                dc[year][month][ns] = list(dc[year][month][ns])
    dc = cleanup_datacontainer(dc, {})
    return dc
Пример #10
0
    def __call__(self):
        if self.edits == []:
            return

        first_year, final_year = determine_year_range(self.edits)

        last_edit_by_year = determine_last_edit_by_year(
            self.edits, first_year, final_year)
        articles_edited = determine_articles_workedon(self.edits, first_year,
                                                      final_year)
        article_count = determine_article_count(articles_edited, first_year,
                                                final_year)

        namespaces_edited = determine_namespaces_workedon(
            self.edits, first_year, final_year)
        character_count = determine_edit_volume(self.edits, first_year,
                                                final_year)
        revert_count = determine_number_reverts(self.edits, first_year,
                                                final_year)

        edit_count = determine_number_edits(self.edits, first_year, final_year)

        totals = {}
        counts = data_converter.create_datacontainer(first_year, final_year)
        totals = calculate_totals(totals, counts, character_count,
                                  'character_count')
        totals = calculate_totals(totals, counts, revert_count, 'revert_count')
        totals = calculate_totals(totals, counts, article_count,
                                  'article_count')
        totals = calculate_totals(totals, counts, edit_count, 'edit_count')

        cum_edit_count_main_ns, cum_edit_count_other_ns = calculate_cum_edits(
            self.edits)

        edits = sort_edits(self.edits)
        if len(edits) > self.cutoff:
            new_wikipedian = edits[self.cutoff]['date']
        else:
            new_wikipedian = False

        first_edit = edits[0]['date']
        final_edit = edits[-1]['date']

        data = {
            'user_id': self.editor_id,
            'username': self.username,
            'new_wikipedian': new_wikipedian,
            'cum_edit_count_main_ns': cum_edit_count_main_ns,
            'cum_edit_count_other_ns': cum_edit_count_other_ns,
            'final_edit': final_edit,
            'first_edit': first_edit,
            'last_edit_by_year': last_edit_by_year,
            'articles_edited': articles_edited,
            'edit_count': edit_count,
            'namespaces_edited': namespaces_edited,
            'article_count': article_count,
            'character_count': character_count,
            'revert_count': revert_count,
            'totals': totals,
        }
        self.db_dataset.insert(data)