Exemplo n.º 1
0
    def run(self):
        require_login(self.api)

        # synchronize the database
        self.db.sync_with_api(self.api)

        try:
            page = AutoPage(self.api, self.pagename)
        except ValueError:
            logger.error(
                "The page [[{}]] currently does not exist. It must be "
                "created manually before the script can update it.".format(
                    self.pagename))
            return

        tables = page.wikicode.filter_tags(
            matches=lambda node: node.tag == "table",
            recursive=page.wikicode.RECURSE_OTHERS)
        assert len(tables) == 2
        table_active, table_inactive = tables

        # extract rows
        rows = self.extract_rows(table_active)
        rows += self.extract_rows(table_inactive)

        # sort
        def sort_key(row):
            return self._get_editcount(row), self._get_last_edit_timestamp(row)

        rows.sort(key=sort_key, reverse=True)

        # split
        rows_active = [
            row for row in rows if self._get_editcount(row) >= self.min_edits
        ]
        rows_inactive = [
            row for row in rows if self._get_editcount(row) < self.min_edits
        ]

        # assemble
        for row in rows_active:
            table_active.contents.append(row)
        for row in rows_inactive:
            table_inactive.contents.append(row)

        # save
        page.save(self.edit_summary, minor="1")
Exemplo n.º 2
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        try:
            self.page = AutoPage(self.api, self.cliargs.statistics_page)
        except ValueError:
            logger.error(
                "The page [[{}]] currently does not exist. It must be "
                "created manually before the script can update it.".format(
                    self.cliargs.statistics_page))
            return 1

        if self.cliargs.force or self.page.is_old_enough(
                min_interval=datetime.timedelta(days=1), strip_time=True):
            self._compose_page()
            return self._output_page()
        else:
            logger.info("The page has already been updated this UTC day")
            return 1
Exemplo n.º 3
0
    def run(self):
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d")
        basename = os.path.join(self.report_dir,
                                "update-pkgs-{}.report".format(timestamp))

        if self.report_page:
            page = AutoPage(self.api, self.report_page)
            div = page.get_tag_by_id("div", "wiki-scripts-archpkgs-report")
            if not page.is_old_enough(datetime.timedelta(days=7),
                                      strip_time=True):
                logger.info(
                    "The report page on the wiki has already been updated in the past 7 days, skipping today's update."
                )
                return

        try:
            self.check_allpages()
        except (KeyboardInterrupt, InteractiveQuit):
            print()
            mwreport = self.get_report_wikitext()
            self.save_report_to_json(mwreport, basename)
            raise

        mwreport = self.get_report_wikitext()
        self.save_report_to_json(mwreport, basename)
        if self.report_page:
            div.contents = mwreport
            try:
                page.save("automatic update", self.interactive)
                logger.info(
                    "Saved report to the [[{}]] page on the wiki.".format(
                        self.report_page))
            except APIError:
                self.save_report_to_file(mwreport, basename)
Exemplo n.º 4
0
    def run(self):
        require_login(self.api)

        try:
            page = AutoPage(self.api, self.pagename)
        except ValueError:
            logger.error("The page [[{}]] currently does not exist. It must be "
                  "created manually before the script can update it."
                  .format(self.pagename))
            return

        tables = page.wikicode.filter_tags(matches=lambda node: node.tag == "table", recursive=page.wikicode.RECURSE_OTHERS)
        assert len(tables) == 2
        table_active, table_inactive = tables

        # extract rows
        rows = self.extract_rows(table_active)
        rows += self.extract_rows(table_inactive)

        # sort
        def sort_key(row):
            return self._get_editcount(row), self._get_last_edit_timestamp(row)
        rows.sort(key=sort_key, reverse=True)

        # split
        rows_active = [row for row in rows if self._get_editcount(row) >= self.min_edits]
        rows_inactive = [row for row in rows if self._get_editcount(row) < self.min_edits]

        # assemble
        for row in rows_active:
            table_active.contents.append(row)
        for row in rows_inactive:
            table_inactive.contents.append(row)

        # save
        page.save(self.edit_summary, minor="1")
Exemplo n.º 5
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        try:
            self.page = AutoPage(self.api, self.cliargs.statistics_page)
        except ValueError:
            logger.error("The page [[{}]] currently does not exist. It must be "
                  "created manually before the script can update it."
                  .format(self.cliargs.statistics_page))
            return 1

        if self.cliargs.force or self.page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True):
            self._compose_page()
            return self._output_page()
        else:
            logger.info("The page has already been updated this UTC day")
            return 1
Exemplo n.º 6
0
class Statistics:
    """
    The whole statistics page.
    """
    def __init__(self, api, db, cliargs):
        self.api = api
        self.db = db
        self.cliargs = cliargs

    @staticmethod
    def set_argparser(argparser):
        # first try to set options for objects we depend on
        present_groups = [group.title for group in argparser._action_groups]
        if "Connection parameters" not in present_groups:
            API.set_argparser(argparser)
        if "Database parameters" not in present_groups:
            Database.set_argparser(argparser)

        output = argparser.add_argument_group(title="output")
        # TODO: maybe leave only the short option to forbid configurability in config file
        output.add_argument(
            '-s',
            '--save',
            action='store_true',
            help='try to save the page (requires being logged in)')
        # FIXME: -c conflicts with -c/--config
        #        output.add_argument('-c', '--clipboard', action='store_true',
        output.add_argument(
            '--clipboard',
            action='store_true',
            help='try to store the updated text in the clipboard')
        output.add_argument(
            '-p',
            '--print',
            action='store_true',
            help='print the updated text in the standard output '
            '(this is the default output method)')

        usstats = argparser.add_argument_group(title="user statistics")
        usstats.add_argument(
            '--us-days-span',
            action='store',
            default=30,
            type=int,
            dest='us_days',
            metavar='N',
            help='the time span in days (default: %(default)s)')
        usstats.add_argument(
            '--us-min-tot-edits',
            action='store',
            default=1000,
            type=int,
            dest='us_mintotedits',
            metavar='N',
            help='minimum total edits for users with not enough '
            'recent changes (default: %(default)s)')
        usstats.add_argument(
            '--us-min-rec-edits',
            action='store',
            default=1,
            type=int,
            dest='us_minrecedits',
            metavar='N',
            help='minimum recent changes for users with not enough '
            'total edits (default: %(default)s)')

        # TODO: main group for "script parameters" would be most logical, but
        #       but argparse does not display nested groups in the help page
        group = argparser.add_argument_group(title="other parameters")

        group.add_argument(
            '-a',
            '--anonymous',
            action='store_true',
            help='do not require logging in: queries may be limited to '
            'a lower rate')
        # TODO: maybe leave only the short option to forbid configurability in config file
        group.add_argument(
            '-f',
            '--force',
            action='store_true',
            help='try to update the page even if it was last saved in '
            'the same UTC day')
        group.add_argument(
            '--statistics-page',
            default='ArchWiki:Statistics',
            help='the page name on the wiki to fetch and update '
            '(default: %(default)s)')
        # TODO: no idea how to forbid setting this globally in the config...
        group.add_argument('--summary',
                           default='automatic update',
                           help='the edit summary to use when saving the page '
                           '(default: %(default)s)')

    @classmethod
    def from_argparser(klass, args, api=None, db=None):
        if api is None:
            api = API.from_argparser(args)
        if db is None:
            db = Database.from_argparser(args)
        return klass(api, db, args)

    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        # synchronize the database
        self.db.sync_with_api(self.api)

        try:
            self.page = AutoPage(self.api, self.cliargs.statistics_page)
        except ValueError:
            logger.error(
                "The page [[{}]] currently does not exist. It must be "
                "created manually before the script can update it.".format(
                    self.cliargs.statistics_page))
            return 1

        if self.cliargs.force or self.page.is_old_enough(
                min_interval=datetime.timedelta(days=1), strip_time=True):
            self._compose_page()
            return self._output_page()
        else:
            logger.info("The page has already been updated this UTC day")
            return 1

    def _compose_page(self):
        userstats = _UserStats(self.api, self.db, self.page,
                               self.cliargs.us_days,
                               self.cliargs.us_mintotedits,
                               self.cliargs.us_minrecedits)
        userstats.update()

    def _output_page(self):
        ret = 0

        if self.cliargs.save:
            require_login(self.api)

            try:
                self.page.save(self.cliargs.summary, minor="1")
                logger.info("The page has been saved: do not forget to "
                            "double-check the diff")
                ret |= 2
            except APIError as err:
                ret |= 1

        if self.cliargs.clipboard or ret is False:
            if Tk:
                w = Tk()
                w.withdraw()
                w.clipboard_clear()
                w.clipboard_append(self.page.wikicode)
                # The copied text is lost once the script terminates
                input(
                    "The updated page text has been copied to the clipboard: "
                    "paste it in the browser, then press Enter to continue")
                w.destroy()

                ret |= 2
            else:
                logger.error("It has not been possible to copy the updated "
                             "text to the clipboard")
                ret |= 1

        # If no other action was chosen, always print the output, so that all
        # the effort doesn't go wasted
        if self.cliargs.print or ret == 0:
            print(self.page.wikicode)

        return ret & 1
Exemplo n.º 7
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        # if we are going to save, make sure that the categories are correct first
        if self.cliargs.save is True:
            cat = Categorization(self.api)
            cat.fix_allpages()
            decat = Decategorization(self.api)
            decat.fix_allpages()

        # build category graph
        graph = CategoryGraph(self.api)

        # if we are going to save, init wanted categories
        if self.cliargs.save is True:
            graph.init_wanted_categories()

        # detect target pages, fetch content at once
        page = AutoPage(self.api, fetch_titles=self.titles)

        for title in self.titles:
            try:
                page.set_title(title)
            except ValueError:
                # page not fetched
                continue

            toc_table = page.get_tag_by_id(tag="table",
                                           id="wiki-scripts-toc-table")
            columns, category_names, alsoin = self.parse_toc_table(
                title, toc_table)

            if toc_table is None:
                if self.cliargs.save is True:
                    logger.error(
                        "The wiki page [[{}]] does not contain the ToC table. "
                        "Create the following entry point manually:\n"
                        "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format(
                            title))
                    continue
                else:
                    logger.warning(
                        "The wiki page [[{}]] does not contain the ToC table, "
                        "so there will be no translations.".format(title))

            if self.cliargs.print:
                ff = PlainFormatter(graph.parents, graph.info, category_names,
                                    alsoin)
            elif self.cliargs.save:
                ff = MediaWikiFormatter(graph.parents,
                                        graph.info,
                                        category_names,
                                        alsoin,
                                        include_opening_closing_tokens=False)
            else:
                raise NotImplementedError("unknown output action: {}".format(
                    self.cliargs.save))

            roots = [
                "Category:{}".format(lang.langname_for_tag(c)) for c in columns
            ]
            ff.format_root(roots)
            if len(roots) == 1:
                for item in graph.walk(graph.subcats, roots[0]):
                    ff.format_row(item)
            elif len(roots) == 2:
                for result in graph.compare_components(graph.subcats, *roots):
                    ff.format_row(*result)
            else:
                logger.error(
                    "Cannot compare more than 2 languages at once. Requested: {}"
                    .format(columns))
                continue

            if self.cliargs.print:
                print("== {} ==\n".format(title))
                print(ff)
            elif self.cliargs.save:
                toc_table.contents = str(ff)
                if self.cliargs.force or page.is_old_enough(
                        min_interval=datetime.timedelta(days=1),
                        strip_time=True):
                    try:
                        page.save(self.cliargs.summary)
                    except APIError:
                        pass
                else:
                    logger.info(
                        "The page [[{}]] has already been updated this UTC day."
                        .format(title))
Exemplo n.º 8
0
    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        # if we are going to save, make sure that the categories are correct first
        if self.cliargs.save is True:
            cat = Categorization(self.api)
            cat.fix_allpages()
            decat = Decategorization(self.api)
            decat.fix_allpages()

        # build category graph
        graph = CategoryGraph(self.api)

        # if we are going to save, init wanted categories
        if self.cliargs.save is True:
            graph.init_wanted_categories()

        # detect target pages, fetch content at once
        page = AutoPage(self.api, fetch_titles=self.titles)

        for title in self.titles:
            try:
                page.set_title(title)
            except ValueError:
                # page not fetched
                continue

            toc_table = page.get_tag_by_id(tag="table", id="wiki-scripts-toc-table")
            columns, category_names, alsoin = self.parse_toc_table(title, toc_table)

            if toc_table is None:
                if self.cliargs.save is True:
                    logger.error(
                        "The wiki page [[{}]] does not contain the ToC table. "
                        "Create the following entry point manually:\n"
                        "{{| id=\"wiki-scripts-toc-table\"\n...\n|}}".format(title))
                    continue
                else:
                    logger.warning(
                        "The wiki page [[{}]] does not contain the ToC table, "
                        "so there will be no translations.".format(title))

            if self.cliargs.print:
                ff = PlainFormatter(graph.parents, graph.info, category_names, alsoin)
            elif self.cliargs.save:
                ff = MediaWikiFormatter(graph.parents, graph.info, category_names, alsoin, include_opening_closing_tokens=False)
            else:
                raise NotImplementedError("unknown output action: {}".format(self.cliargs.save))

            roots = ["Category:{}".format(lang.langname_for_tag(c)) for c in columns]
            ff.format_root(roots)
            if len(roots) == 1:
                for item in graph.walk(graph.subcats, roots[0]):
                    ff.format_row(item)
            elif len(roots) == 2:
                for result in graph.compare_components(graph.subcats, *roots):
                    ff.format_row(*result)
            else:
                logger.error("Cannot compare more than 2 languages at once. Requested: {}".format(columns))
                continue

            if self.cliargs.print:
                print("== {} ==\n".format(title))
                print(ff)
            elif self.cliargs.save:
                toc_table.contents = str(ff)
                if self.cliargs.force or page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True):
                    try:
                        page.save(self.cliargs.summary)
                    except APIError:
                        pass
                else:
                    logger.info("The page [[{}]] has already been updated this UTC day.".format(title))
Exemplo n.º 9
0
class Statistics:
    """
    The whole statistics page.
    """
    def __init__(self, api, cliargs):
        self.api = api
        self.cliargs = cliargs

    @staticmethod
    def set_argparser(argparser):
        # first try to set options for objects we depend on
        present_groups = [group.title for group in argparser._action_groups]
        if "Connection parameters" not in present_groups:
            API.set_argparser(argparser)

        output = argparser.add_argument_group(title="output")
        # TODO: maybe leave only the short option to forbid configurability in config file
        output.add_argument('-s', '--save', action='store_true',
                        help='try to save the page (requires being logged in)')
        # FIXME: -c conflicts with -c/--config
#        output.add_argument('-c', '--clipboard', action='store_true',
        output.add_argument('--clipboard', action='store_true',
                        help='try to store the updated text in the clipboard')
        output.add_argument('-p', '--print', action='store_true',
                        help='print the updated text in the standard output '
                        '(this is the default output method)')

        usstats = argparser.add_argument_group(title="user statistics")
        usstats.add_argument('--us-days-span', action='store', default=30,
                    type=int, dest='us_days', metavar='N',
                    help='the time span in days (default: %(default)s)')
        usstats.add_argument('--us-min-tot-edits', action='store',
                    default=1000, type=int, dest='us_mintotedits', metavar='N',
                    help='minimum total edits for users with not enough '
                    'recent changes (default: %(default)s)')
        usstats.add_argument('--us-min-rec-edits', action='store',
                    default=1, type=int, dest='us_minrecedits', metavar='N',
                    help='minimum recent changes for users with not enough '
                    'total edits (default: %(default)s)')

        # TODO: main group for "script parameters" would be most logical, but
        #       but argparse does not display nested groups in the help page
        group = argparser.add_argument_group(title="other parameters")

        group.add_argument('-a', '--anonymous', action='store_true',
                    help='do not require logging in: queries may be limited to '
                    'a lower rate')
        # TODO: maybe leave only the short option to forbid configurability in config file
        group.add_argument('-f', '--force', action='store_true',
                    help='try to update the page even if it was last saved in '
                    'the same UTC day')
        group.add_argument('--statistics-page', default='ArchWiki:Statistics',
                    help='the page name on the wiki to fetch and update '
                    '(default: %(default)s)')
        # TODO: no idea how to forbid setting this globally in the config...
        group.add_argument('--summary', default='automatic update',
                    help='the edit summary to use when saving the page '
                    '(default: %(default)s)')

    @classmethod
    def from_argparser(klass, args, api=None):
        if api is None:
            api = API.from_argparser(args)
        return klass(api, args)

    def run(self):
        if not self.cliargs.anonymous:
            require_login(self.api)

        try:
            self.page = AutoPage(self.api, self.cliargs.statistics_page)
        except ValueError:
            logger.error("The page [[{}]] currently does not exist. It must be "
                  "created manually before the script can update it."
                  .format(self.cliargs.statistics_page))
            return 1

        if self.cliargs.force or self.page.is_old_enough(min_interval=datetime.timedelta(days=1), strip_time=True):
            self._compose_page()
            return self._output_page()
        else:
            logger.info("The page has already been updated this UTC day")
            return 1

    def _compose_page(self):
        userstats = _UserStats(self.api, self.cliargs.cache_dir, self.page,
                    self.cliargs.us_days, self.cliargs.us_mintotedits,
                    self.cliargs.us_minrecedits)
        userstats.update()

    def _output_page(self):
        ret = 0

        if self.cliargs.save:
            require_login(self.api)

            try:
                self.page.save(self.cliargs.summary, minor="1")
                logger.info("The page has been saved: do not forget to "
                            "double-check the diff")
                ret |= 2
            except APIError as err:
                ret |= 1

        if self.cliargs.clipboard or ret is False:
            if Tk:
                w = Tk()
                w.withdraw()
                w.clipboard_clear()
                w.clipboard_append(self.page.wikicode)
                # The copied text is lost once the script terminates
                input("The updated page text has been copied to the clipboard: "
                      "paste it in the browser, then press Enter to continue")
                w.destroy()

                ret |= 2
            else:
                logger.error("It has not been possible to copy the updated "
                             "text to the clipboard")
                ret |= 1

        # If no other action was chosen, always print the output, so that all
        # the effort doesn't go wasted
        if self.cliargs.print or ret == 0:
            print(self.page.wikicode)

        return ret & 1