def run(self): require_login(self.api) # synchronize the database self.db.sync_with_api(self.api) try: page = AutoPage(self.api, self.pagename) except ValueError: logger.error( "The page [[{}]] currently does not exist. It must be " "created manually before the script can update it.".format( self.pagename)) return tables = page.wikicode.filter_tags( matches=lambda node: node.tag == "table", recursive=page.wikicode.RECURSE_OTHERS) assert len(tables) == 2 table_active, table_inactive = tables # extract rows rows = self.extract_rows(table_active) rows += self.extract_rows(table_inactive) # sort def sort_key(row): return self._get_editcount(row), self._get_last_edit_timestamp(row) rows.sort(key=sort_key, reverse=True) # split rows_active = [ row for row in rows if self._get_editcount(row) >= self.min_edits ] rows_inactive = [ row for row in rows if self._get_editcount(row) < self.min_edits ] # assemble for row in rows_active: table_active.contents.append(row) for row in rows_inactive: table_inactive.contents.append(row) # save page.save(self.edit_summary, minor="1")
def run(self): if not self.cliargs.anonymous: require_login(self.api) try: self.page = AutoPage(self.api, self.cliargs.statistics_page) except ValueError: logger.error( "The page [[{}]] currently does not exist. It must be " "created manually before the script can update it.".format( self.cliargs.statistics_page)) return 1 if self.cliargs.force or self.page.is_old_enough( min_interval=datetime.timedelta(days=1), strip_time=True): self._compose_page() return self._output_page() else: logger.info("The page has already been updated this UTC day") return 1
def run(self): timestamp = datetime.datetime.now().strftime("%Y-%m-%d") basename = os.path.join(self.report_dir, "update-pkgs-{}.report".format(timestamp)) if self.report_page: page = AutoPage(self.api, self.report_page) div = page.get_tag_by_id("div", "wiki-scripts-archpkgs-report") if not page.is_old_enough(datetime.timedelta(days=7), strip_time=True): logger.info( "The report page on the wiki has already been updated in the past 7 days, skipping today's update." ) return try: self.check_allpages() except (KeyboardInterrupt, InteractiveQuit): print() mwreport = self.get_report_wikitext() self.save_report_to_json(mwreport, basename) raise mwreport = self.get_report_wikitext() self.save_report_to_json(mwreport, basename) if self.report_page: div.contents = mwreport try: page.save("automatic update", self.interactive) logger.info( "Saved report to the [[{}]] page on the wiki.".format( self.report_page)) except APIError: self.save_report_to_file(mwreport, basename)
def run(self): require_login(self.api) try: page = AutoPage(self.api, self.pagename) except ValueError: logger.error("The page [[{}]] currently does not exist. It must be " "created manually before the script can update it." .format(self.pagename)) return tables = page.wikicode.filter_tags(matches=lambda node: node.tag == "table", recursive=page.wikicode.RECURSE_OTHERS) assert len(tables) == 2 table_active, table_inactive = tables # extract rows rows = self.extract_rows(table_active) rows += self.extract_rows(table_inactive) # sort def sort_key(row): return self._get_editcount(row), self._get_last_edit_timestamp(row) rows.sort(key=sort_key, reverse=True) # split rows_active = [row for row in rows if self._get_editcount(row) >= self.min_edits] rows_inactive = [row for row in rows if self._get_editcount(row) < self.min_edits] # assemble for row in rows_active: table_active.contents.append(row) for row in rows_inactive: table_inactive.contents.append(row) # save page.save(self.edit_summary, minor="1")
def run(self): if not self.cliargs.anonymous: require_login(self.api) try: self.page = AutoPage(self.api, self.cliargs.statistics_page) except ValueError: logger.error("The page [[{}]] currently does not exist. It must be " "created manually before the script can update it." .format(self.cliargs.statistics_page)) return 1 if self.cliargs.force or self.page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True): self._compose_page() return self._output_page() else: logger.info("The page has already been updated this UTC day") return 1
class Statistics: """ The whole statistics page. """ def __init__(self, api, db, cliargs): self.api = api self.db = db self.cliargs = cliargs @staticmethod def set_argparser(argparser): # first try to set options for objects we depend on present_groups = [group.title for group in argparser._action_groups] if "Connection parameters" not in present_groups: API.set_argparser(argparser) if "Database parameters" not in present_groups: Database.set_argparser(argparser) output = argparser.add_argument_group(title="output") # TODO: maybe leave only the short option to forbid configurability in config file output.add_argument( '-s', '--save', action='store_true', help='try to save the page (requires being logged in)') # FIXME: -c conflicts with -c/--config # output.add_argument('-c', '--clipboard', action='store_true', output.add_argument( '--clipboard', action='store_true', help='try to store the updated text in the clipboard') output.add_argument( '-p', '--print', action='store_true', help='print the updated text in the standard output ' '(this is the default output method)') usstats = argparser.add_argument_group(title="user statistics") usstats.add_argument( '--us-days-span', action='store', default=30, type=int, dest='us_days', metavar='N', help='the time span in days (default: %(default)s)') usstats.add_argument( '--us-min-tot-edits', action='store', default=1000, type=int, dest='us_mintotedits', metavar='N', help='minimum total edits for users with not enough ' 'recent changes (default: %(default)s)') usstats.add_argument( '--us-min-rec-edits', action='store', default=1, type=int, dest='us_minrecedits', metavar='N', help='minimum recent changes for users with not enough ' 'total edits (default: %(default)s)') # TODO: main group for "script parameters" would be most logical, but # but argparse does not display nested groups in the help page group = argparser.add_argument_group(title="other parameters") group.add_argument( '-a', '--anonymous', action='store_true', help='do not require logging in: queries may be limited to ' 'a lower rate') # TODO: maybe leave only the short option to forbid configurability in config file group.add_argument( '-f', '--force', action='store_true', help='try to update the page even if it was last saved in ' 'the same UTC day') group.add_argument( '--statistics-page', default='ArchWiki:Statistics', help='the page name on the wiki to fetch and update ' '(default: %(default)s)') # TODO: no idea how to forbid setting this globally in the config... group.add_argument('--summary', default='automatic update', help='the edit summary to use when saving the page ' '(default: %(default)s)') @classmethod def from_argparser(klass, args, api=None, db=None): if api is None: api = API.from_argparser(args) if db is None: db = Database.from_argparser(args) return klass(api, db, args) def run(self): if not self.cliargs.anonymous: require_login(self.api) # synchronize the database self.db.sync_with_api(self.api) try: self.page = AutoPage(self.api, self.cliargs.statistics_page) except ValueError: logger.error( "The page [[{}]] currently does not exist. It must be " "created manually before the script can update it.".format( self.cliargs.statistics_page)) return 1 if self.cliargs.force or self.page.is_old_enough( min_interval=datetime.timedelta(days=1), strip_time=True): self._compose_page() return self._output_page() else: logger.info("The page has already been updated this UTC day") return 1 def _compose_page(self): userstats = _UserStats(self.api, self.db, self.page, self.cliargs.us_days, self.cliargs.us_mintotedits, self.cliargs.us_minrecedits) userstats.update() def _output_page(self): ret = 0 if self.cliargs.save: require_login(self.api) try: self.page.save(self.cliargs.summary, minor="1") logger.info("The page has been saved: do not forget to " "double-check the diff") ret |= 2 except APIError as err: ret |= 1 if self.cliargs.clipboard or ret is False: if Tk: w = Tk() w.withdraw() w.clipboard_clear() w.clipboard_append(self.page.wikicode) # The copied text is lost once the script terminates input( "The updated page text has been copied to the clipboard: " "paste it in the browser, then press Enter to continue") w.destroy() ret |= 2 else: logger.error("It has not been possible to copy the updated " "text to the clipboard") ret |= 1 # If no other action was chosen, always print the output, so that all # the effort doesn't go wasted if self.cliargs.print or ret == 0: print(self.page.wikicode) return ret & 1
def run(self): if not self.cliargs.anonymous: require_login(self.api) # if we are going to save, make sure that the categories are correct first if self.cliargs.save is True: cat = Categorization(self.api) cat.fix_allpages() decat = Decategorization(self.api) decat.fix_allpages() # build category graph graph = CategoryGraph(self.api) # if we are going to save, init wanted categories if self.cliargs.save is True: graph.init_wanted_categories() # detect target pages, fetch content at once page = AutoPage(self.api, fetch_titles=self.titles) for title in self.titles: try: page.set_title(title) except ValueError: # page not fetched continue toc_table = page.get_tag_by_id(tag="table", id="wiki-scripts-toc-table") columns, category_names, alsoin = self.parse_toc_table( title, toc_table) if toc_table is None: if self.cliargs.save is True: logger.error( "The wiki page [[{}]] does not contain the ToC table. " "Create the following entry point manually:\n" "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format( title)) continue else: logger.warning( "The wiki page [[{}]] does not contain the ToC table, " "so there will be no translations.".format(title)) if self.cliargs.print: ff = PlainFormatter(graph.parents, graph.info, category_names, alsoin) elif self.cliargs.save: ff = MediaWikiFormatter(graph.parents, graph.info, category_names, alsoin, include_opening_closing_tokens=False) else: raise NotImplementedError("unknown output action: {}".format( self.cliargs.save)) roots = [ "Category:{}".format(lang.langname_for_tag(c)) for c in columns ] ff.format_root(roots) if len(roots) == 1: for item in graph.walk(graph.subcats, roots[0]): ff.format_row(item) elif len(roots) == 2: for result in graph.compare_components(graph.subcats, *roots): ff.format_row(*result) else: logger.error( "Cannot compare more than 2 languages at once. Requested: {}" .format(columns)) continue if self.cliargs.print: print("== {} ==\n".format(title)) print(ff) elif self.cliargs.save: toc_table.contents = str(ff) if self.cliargs.force or page.is_old_enough( min_interval=datetime.timedelta(days=1), strip_time=True): try: page.save(self.cliargs.summary) except APIError: pass else: logger.info( "The page [[{}]] has already been updated this UTC day." .format(title))
def run(self): if not self.cliargs.anonymous: require_login(self.api) # if we are going to save, make sure that the categories are correct first if self.cliargs.save is True: cat = Categorization(self.api) cat.fix_allpages() decat = Decategorization(self.api) decat.fix_allpages() # build category graph graph = CategoryGraph(self.api) # if we are going to save, init wanted categories if self.cliargs.save is True: graph.init_wanted_categories() # detect target pages, fetch content at once page = AutoPage(self.api, fetch_titles=self.titles) for title in self.titles: try: page.set_title(title) except ValueError: # page not fetched continue toc_table = page.get_tag_by_id(tag="table", id="wiki-scripts-toc-table") columns, category_names, alsoin = self.parse_toc_table(title, toc_table) if toc_table is None: if self.cliargs.save is True: logger.error( "The wiki page [[{}]] does not contain the ToC table. " "Create the following entry point manually:\n" "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format(title)) continue else: logger.warning( "The wiki page [[{}]] does not contain the ToC table, " "so there will be no translations.".format(title)) if self.cliargs.print: ff = PlainFormatter(graph.parents, graph.info, category_names, alsoin) elif self.cliargs.save: ff = MediaWikiFormatter(graph.parents, graph.info, category_names, alsoin, include_opening_closing_tokens=False) else: raise NotImplementedError("unknown output action: {}".format(self.cliargs.save)) roots = ["Category:{}".format(lang.langname_for_tag(c)) for c in columns] ff.format_root(roots) if len(roots) == 1: for item in graph.walk(graph.subcats, roots[0]): ff.format_row(item) elif len(roots) == 2: for result in graph.compare_components(graph.subcats, *roots): ff.format_row(*result) else: logger.error("Cannot compare more than 2 languages at once. Requested: {}".format(columns)) continue if self.cliargs.print: print("== {} ==\n".format(title)) print(ff) elif self.cliargs.save: toc_table.contents = str(ff) if self.cliargs.force or page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True): try: page.save(self.cliargs.summary) except APIError: pass else: logger.info("The page [[{}]] has already been updated this UTC day.".format(title))
class Statistics: """ The whole statistics page. """ def __init__(self, api, cliargs): self.api = api self.cliargs = cliargs @staticmethod def set_argparser(argparser): # first try to set options for objects we depend on present_groups = [group.title for group in argparser._action_groups] if "Connection parameters" not in present_groups: API.set_argparser(argparser) output = argparser.add_argument_group(title="output") # TODO: maybe leave only the short option to forbid configurability in config file output.add_argument('-s', '--save', action='store_true', help='try to save the page (requires being logged in)') # FIXME: -c conflicts with -c/--config # output.add_argument('-c', '--clipboard', action='store_true', output.add_argument('--clipboard', action='store_true', help='try to store the updated text in the clipboard') output.add_argument('-p', '--print', action='store_true', help='print the updated text in the standard output ' '(this is the default output method)') usstats = argparser.add_argument_group(title="user statistics") usstats.add_argument('--us-days-span', action='store', default=30, type=int, dest='us_days', metavar='N', help='the time span in days (default: %(default)s)') usstats.add_argument('--us-min-tot-edits', action='store', default=1000, type=int, dest='us_mintotedits', metavar='N', help='minimum total edits for users with not enough ' 'recent changes (default: %(default)s)') usstats.add_argument('--us-min-rec-edits', action='store', default=1, type=int, dest='us_minrecedits', metavar='N', help='minimum recent changes for users with not enough ' 'total edits (default: %(default)s)') # TODO: main group for "script parameters" would be most logical, but # but argparse does not display nested groups in the help page group = argparser.add_argument_group(title="other parameters") group.add_argument('-a', '--anonymous', action='store_true', help='do not require logging in: queries may be limited to ' 'a lower rate') # TODO: maybe leave only the short option to forbid configurability in config file group.add_argument('-f', '--force', action='store_true', help='try to update the page even if it was last saved in ' 'the same UTC day') group.add_argument('--statistics-page', default='ArchWiki:Statistics', help='the page name on the wiki to fetch and update ' '(default: %(default)s)') # TODO: no idea how to forbid setting this globally in the config... group.add_argument('--summary', default='automatic update', help='the edit summary to use when saving the page ' '(default: %(default)s)') @classmethod def from_argparser(klass, args, api=None): if api is None: api = API.from_argparser(args) return klass(api, args) def run(self): if not self.cliargs.anonymous: require_login(self.api) try: self.page = AutoPage(self.api, self.cliargs.statistics_page) except ValueError: logger.error("The page [[{}]] currently does not exist. It must be " "created manually before the script can update it." .format(self.cliargs.statistics_page)) return 1 if self.cliargs.force or self.page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True): self._compose_page() return self._output_page() else: logger.info("The page has already been updated this UTC day") return 1 def _compose_page(self): userstats = _UserStats(self.api, self.cliargs.cache_dir, self.page, self.cliargs.us_days, self.cliargs.us_mintotedits, self.cliargs.us_minrecedits) userstats.update() def _output_page(self): ret = 0 if self.cliargs.save: require_login(self.api) try: self.page.save(self.cliargs.summary, minor="1") logger.info("The page has been saved: do not forget to " "double-check the diff") ret |= 2 except APIError as err: ret |= 1 if self.cliargs.clipboard or ret is False: if Tk: w = Tk() w.withdraw() w.clipboard_clear() w.clipboard_append(self.page.wikicode) # The copied text is lost once the script terminates input("The updated page text has been copied to the clipboard: " "paste it in the browser, then press Enter to continue") w.destroy() ret |= 2 else: logger.error("It has not been possible to copy the updated " "text to the clipboard") ret |= 1 # If no other action was chosen, always print the output, so that all # the effort doesn't go wasted if self.cliargs.print or ret == 0: print(self.page.wikicode) return ret & 1