Пример #1
0
class BatchProgram(Program):
    class Config(ProgramConfig):
        title = "batch"
        aliases = ("b", )
        description = "Execute previously created batch of requests."

    settings: NastySettings = Argument(
        alias="config", description="Overwrite default config file path.")

    batch_file: Path = Argument(
        alias="batch-file",
        short_alias="b",
        description="Batch file to which requests have been appended.",
        metavar="FILE",
        group=_BATCH_ARGUMENT_GROUP,
    )

    results_dir: Path = Argument(
        alias="results-dir",
        short_alias="r",
        description="Directory to which results will be written.",
        metavar="DIR",
        group=_BATCH_ARGUMENT_GROUP,
    )

    @overrides
    def run(self) -> None:
        batch = Batch()
        batch.load(self.batch_file)
        batch.execute(self.results_dir)
Пример #2
0
class _ExportProgram(Program):
    class Config(ProgramConfig):
        title = "export"
        aliases = ("e", )
        description = "Export a dataset subset to CSV."

    settings: NastyAnalysisSettings = Argument(
        alias="config", description="Overwrite default config file path.")

    dataset: Optional[str] = Argument(
        short_alias="d",
        description="Name of the dataset.",
        metavar="NAME",
        group=_INDEX_ARGUMENT_GROUP,
    )
    query: str = Argument(
        short_alias="q",
        description="Elasticsearch query string for the exported subset.",
        group=_EXPORT_ARGUMENT_GROUP,
    )
    output: Path = Argument(
        short_alias="o",
        description="CSV-File to which the output will be written.",
        metavar="FILE",
        group=_EXPORT_ARGUMENT_GROUP,
    )

    @overrides
    def run(self) -> None:
        dataset = _make_dataset(self.settings, self.dataset)
        self.settings.setup_elasticsearch_connection()
        dataset.export(self.query, self.output)
Пример #3
0
class RequestProgram(Program):
    max_tweets: Optional[int] = Argument(
        100,
        alias="max-tweets",
        short_alias="n",
        description=(
            "Maximum number of tweets to retrieve. Set to -1 to receive as many as "
            "possible. Defaults to 100."
        ),
        metavar="N",
        group=_REQUEST_ARGUMENT_GROUP,
    )

    @validator("max_tweets")
    def _max_tweets_validator(cls, v: Optional[int]) -> Optional[int]:  # noqa: N805
        return v if v != -1 else None

    batch_size: int = Argument(
        -1,
        alias="batch-size",
        short_alias="i",
        description=(
            "Batch size to retrieve Tweets in. Set to -1 for default behavior. Only "
            "change when necessary."
        ),
        metavar="N",
        group=_REQUEST_ARGUMENT_GROUP,
    )

    @validator("batch_size")
    def _batch_size_validator(cls, v: int) -> int:  # noqa: N805
        return v if v != -1 else DEFAULT_BATCH_SIZE

    to_batch: Optional[Path] = Argument(
        alias="to-batch",
        short_alias="b",
        description="Append request to batch file instead of executing.",
        metavar="FILE",
        group=_BATCH_ARGUMENT_GROUP,
    )

    @overrides
    def run(self) -> None:
        request = self._build_request()
        if self.to_batch:
            batch = Batch()
            if self.to_batch.exists():
                batch.load(self.to_batch)
            self._batch_submit(batch, request)
            batch.dump(self.to_batch)
        else:
            for tweet in request.request():
                sys.stdout.write(json.dumps(tweet.to_json()) + "\n")

    def _build_request(self) -> Request:
        raise NotImplementedError()

    def _batch_submit(self, batch: Batch, request: Request) -> None:
        batch.append(request)
Пример #4
0
class ArgParsingProgram(Program):
    class Config(ProgramConfig):
        title = "myprog"
        version = "0.0.0"
        description = "My program description."

    foo: str = "foo"
    bar: int = Argument(5, short_alias="b", description="my arg desc", group=_MY_GROUP)
    baz: int = Argument(alias="baz-alias", metavar="VALUE", group=_MY_GROUP)
    qux: bool
Пример #5
0
class UnidifyProgram(Program):
    class Config(ProgramConfig):
        title = "unidify"
        aliases = ("u", "unid")
        description = (
            "Collect full Tweet information from Tweet-IDs (via official Twitter API)."
        )

    settings: NastySettings = Argument(
        alias="config", description="Overwrite default config file path.")

    in_dir: Optional[Path] = Argument(
        alias="in-dir",
        short_alias="i",
        description="Directory with idified batch results.",
        metavar="DIR",
        group=_UNIDIFY_ARGUMENT_GROUP,
    )

    out_dir: Optional[Path] = Argument(
        alias="out-dir",
        short_alias="o",
        description=
        ("Directory to which unidified batch results will be written. If not "
         "given, will use input directory."),
        metavar="DIR",
        group=_UNIDIFY_ARGUMENT_GROUP,
    )

    @validator("out_dir")
    def _out_dir_validator(
            cls,
            v: Optional[Path],
            values: Mapping[str, object]  # noqa: N805
    ) -> Optional[Path]:
        if v and not values["in_dir"]:
            raise ValueError("-o/--out-dir requires -i/--in-dir.")
        return v

    @overrides
    def run(self) -> None:
        if self.in_dir:
            batch_results = BatchResults(self.in_dir)
            batch_results.unidify(
                self.settings.twitter_api,
                self.out_dir if self.out_dir else self.in_dir,
            )
        else:
            for tweet in statuses_lookup(
                (TweetId(line.strip()) for line in sys.stdin),
                    self.settings.twitter_api):
                if tweet is not None:
                    sys.stdout.write(json.dumps(tweet.to_json()) + "\n")
Пример #6
0
class MyProgram(Program):
    class Config(ProgramConfig):
        title = "myprog"
        version = nasty_utils.__version__
        description = "Description of my program."

    arg: int = Argument(0, short_alias="a", description="Description of my arg.")
    settings: LoggingSettings

    @overrides
    def run(self) -> None:
        _LOGGER.debug("before")

        for i in tqdm(cast(Iterator[int], range(3)), desc="Epoch"):
            _LOGGER.debug("foo {} bar".format(i))
            for _ in tqdm(
                cast(Iterator[int], range(30)), desc="Batch {}".format(i), leave=False
            ):
                sleep(0.01)

        _LOGGER.debug("after")
        _LOGGER.info("arg: '{}' {{{}}}", self.arg, bool(self.arg))

        _LOGGER.debug("debug")
        _LOGGER.info("info")
        _LOGGER.warning("warning")
        _LOGGER.error("error")
        _LOGGER.critical("critical")
Пример #7
0
class IdifyProgram(Program):
    class Config(ProgramConfig):
        title = "idify"
        aliases = ("i", "id")
        description = "Reduce Tweet-collection to Tweet-IDs (for publishing)."

    settings: NastySettings = Argument(
        alias="config", description="Overwrite default config file path.")

    in_dir: Optional[Path] = Argument(
        alias="in-dir",
        short_alias="i",
        description="Directory with results of a batch of requests.",
        metavar="DIR",
        group=_IDIFY_ARGUMENT_GROUP,
    )

    out_dir: Optional[Path] = Argument(
        alias="out-dir",
        short_alias="o",
        description=
        ("Directory to which Tweet-IDs will be written. If not given, will use "
         "input directory."),
        metavar="DIR",
        group=_IDIFY_ARGUMENT_GROUP,
    )

    @validator("out_dir")
    def _out_dir_validator(
            cls,
            v: Optional[Path],
            values: Mapping[str, object]  # noqa: N805
    ) -> Optional[Path]:
        if v and not values["in_dir"]:
            raise ValueError("-o/--out-dir requires -i/--in-dir.")
        return v

    @overrides
    def run(self) -> None:
        if self.in_dir:
            batch_results = BatchResults(self.in_dir)
            batch_results.idify(self.out_dir if self.out_dir else self.in_dir)
        else:
            for line in sys.stdin:
                sys.stdout.write(str(Tweet(json.loads(line)).id) + "\n")
Пример #8
0
class MultipleSettingsProgram(Program):
    foo: FooSettings
    bar: BarSettings = Argument(
        short_alias="b", description="Overwrite bar setting path."
    )

    @overrides
    def run(self) -> None:
        print(self.foo.n * self.bar.m)  # noqa: T001
Пример #9
0
class _RetrieveProgram(Program):
    class Config(ProgramConfig):
        title = "retrieve"
        aliases = ("r", )
        description = "Retrieve a dataset."

    settings: NastyAnalysisSettings = Argument(
        alias="config", description="Overwrite default config file path.")

    dataset: Optional[str] = Argument(
        short_alias="d",
        description="Name of the dataset.",
        metavar="NAME",
        group=_RETRIEVE_ARGUMENT_GROUP,
    )

    @overrides
    def run(self) -> None:
        dataset = _make_dataset(self.settings, self.dataset)
        dataset.retrieve()
Пример #10
0
class _IndexProgram(Program):
    class Config(ProgramConfig):
        title = "index"
        aliases = ("i", )
        description = "Index a dataset into Elasticsearch."

    settings: NastyAnalysisSettings = Argument(
        alias="config", description="Overwrite default config file path.")

    dataset: Optional[str] = Argument(
        short_alias="d",
        description="Name of the dataset.",
        metavar="NAME",
        group=_INDEX_ARGUMENT_GROUP,
    )

    @overrides
    def run(self) -> None:
        dataset = _make_dataset(self.settings, self.dataset)
        self.settings.setup_elasticsearch_connection()
        dataset.index()
Пример #11
0
class RepliesProgram(RequestProgram):
    class Config(ProgramConfig):
        title = "replies"
        aliases = ("r", )
        description = "Retrieve all directly replying Tweets to a Tweet."

    settings: NastySettings = Argument(
        alias="config", description="Overwrite default config file path.")

    tweet_id: TweetId = Argument(
        alias="tweet-id",
        short_alias="t",
        description="ID of the Tweet to retrieve replies for (required).",
        metavar="ID",
        group=_REPLIES_ARGUMENT_GROUP,
    )

    @overrides
    def _build_request(self) -> Request:
        return Replies(self.tweet_id,
                       max_tweets=self.max_tweets,
                       batch_size=self.batch_size)
Пример #12
0
class ThreadProgram(RequestProgram):
    class Config(ProgramConfig):
        title = "thread"
        aliases = ("t", )
        description = "Retrieve all Tweets threaded under a Tweet."

    settings: NastySettings = Argument(
        alias="config", description="Overwrite default config file path.")

    tweet_id: TweetId = Argument(
        alias="tweet-id",
        short_alias="t",
        description=
        "ID of the Tweet to retrieve threaded Tweets for (required).",
        metavar="ID",
        group=_THREAD_ARGUMENT_GROUP,
    )

    @overrides
    def _build_request(self) -> Request:
        return Thread(self.tweet_id,
                      max_tweets=self.max_tweets,
                      batch_size=self.batch_size)
Пример #13
0
class _GdeltProgram(Program):
    # TODO: integrate into RetrieveProgram.
    class Config(ProgramConfig):
        title = "gdelt"
        aliases = ("g", )
        description = "TODO"

    settings: NastyAnalysisSettings = Argument(
        alias="config", description="Overwrite default config file path.")

    @overrides
    def run(self) -> None:
        from nasty_analysis.gdelt import gdelt

        gdelt()
Пример #14
0
class NastyProgram(Program):
    class Config(ProgramConfig):
        title = "nasty"
        version = nasty.__version__
        description = "NASTY Advanced Search Tweet Yielder."
        subprograms = (
            SearchProgram,
            RepliesProgram,
            ThreadProgram,
            BatchProgram,
            IdifyProgram,
            UnidifyProgram,
        )

    settings: NastySettings = Argument(
        alias="config", description="Overwrite default config file path.")
Пример #15
0
class WallcropProgram(Program):
    class Config(ProgramConfig):
        title = "wallcrop"
        version = wallcrop.__version__
        description = "Multi-monitor wallpaper cropping tool."

    settings: WallcropSettings = Argument(
        alias="config", description="Overwrite default config file path.")

    @overrides
    def run(self) -> None:
        # TODO: pydantic validation of settings.

        wallpaper_path = Path("assets/Nordic Landscape 1125x250.png")
        with Image.open(wallpaper_path) as wallpaper:
            window = Window(self.settings.workstations[0], wallpaper)
            window.mainloop()
Пример #16
0
class SearchProgram(RequestProgram):
    class Config(ProgramConfig):
        title = "search"
        aliases = ("s", )
        description = "Retrieve Tweets using the Twitter advanced search."

    settings: NastySettings = Argument(
        alias="config", description="Overwrite default config file path.")

    query: str = Argument(
        short_alias="q",
        description="Search string (required).",
        group=_SEARCH_ARGUMENT_GROUP,
    )

    since: Optional[date] = Argument(
        short_alias="s",
        description="Earliest date for Tweets (inclusive) as YYYY-MM-DD.",
        metavar="DATE",
        group=_SEARCH_ARGUMENT_GROUP,
    )

    @validator("since", pre=True)
    def _since_validator(cls,
                         v: Optional[str]) -> Optional[date]:  # noqa: N805
        return parse_yyyy_mm_dd(v) if v else None

    until: Optional[date] = Argument(
        short_alias="u",
        description="Latest date for Tweets (exclusive) as YYYY-MM-DD.",
        metavar="DATE",
        group=_SEARCH_ARGUMENT_GROUP,
    )

    @validator("until", pre=True, always=False)
    def _until_validator(cls,
                         v: Optional[str]) -> Optional[date]:  # noqa: N805
        return parse_yyyy_mm_dd(v) if v else None

    filter_: SearchFilter = Argument(
        DEFAULT_FILTER,
        alias="filter",
        short_alias="f",
        description=
        ("Sorting/filtering of Tweets (top, latest, photos, videos). Defaults "
         "to 'top'."),
        group=_SEARCH_ARGUMENT_GROUP,
    )

    lang: str = Argument(
        "en",
        short_alias="l",
        description=
        ("Language for Tweets, presumably as ISO 3166-1 two or three letter codes. "
         "Defaults to 'en'."),
        group=_SEARCH_ARGUMENT_GROUP,
    )

    daily: bool = Argument(
        False,
        short_alias="d",
        description=
        ("For a request with since and until date, append one search request "
         "per day in the date-range with identical settings otherwise."),
        group=_BATCH_ARGUMENT_GROUP,
    )

    @validator("daily")
    def _daily_validator(
            cls,
            v: bool,
            values: Mapping[str, object]  # noqa:N805
    ) -> bool:
        if v and not values["to_batch"]:
            raise ValueError("-d/--daily requires -b/--to-batch.")
        if v and (values["since"] is None or values["until"] is None):
            raise ValueError("-d/--daily requires -s/--since and -u/--until.")
        return v

    @overrides
    def _build_request(self) -> Search:
        return Search(
            self.query,
            since=self.since,
            until=self.until,
            filter_=self.filter_,
            lang=self.lang,
            max_tweets=self.max_tweets,
            batch_size=self.batch_size,
        )

    @overrides
    def _batch_submit(self, batch: Batch, request: Request) -> None:
        request = checked_cast(Search, request)
        if self.daily:
            for daily_request in request.to_daily_requests():
                super()._batch_submit(batch, daily_request)
        else:
            super()._batch_submit(batch, request)
Пример #17
0
class _ServeProgram(Program):
    class Config(ProgramConfig):
        title = "serve"
        aliases = ("s", )
        description = "Start Bokeh visualization server."

    settings: NastyAnalysisSettings = Argument(
        alias="config", description="Overwrite default config file path.")

    show: bool = Argument(
        False,
        short_alias="s",
        description="Open visualization server in a browser after startup.",
        group=_SERVE_ARGUMENTS_GROUP,
    )
    develop: bool = Argument(
        False,
        short_alias="develop",
        description="Run in development mode (autoreload scripts).",
        group=_SERVE_ARGUMENTS_GROUP,
    )

    @overrides
    def run(self) -> None:
        self.settings.setup_elasticsearch_connection()

        # The following is a simpler `bokeh serve src/nasty_analysis/visualization`.
        # Code for that is in `bokeh.commands.subcommands.serve.Serve.invoke`.
        # Also Bokeh provides this example:
        # https://github.com/bokeh/bokeh/blob/2.0.2/examples/howto/server_embed/standalone_embed.py

        address = self.settings.analysis.serve.address
        port = self.settings.analysis.serve.port
        num_procs = self.settings.analysis.num_procs
        autoreload = False

        if self.develop:
            num_procs = 1
            autoreload = True

            watch(str(self.settings.find_settings_file()))

            for file in Path(nasty_analysis.__file__).parent.glob("**/*.js"):
                watch(str(file))

        application = ParameterPassingApplication(
            DirectoryHandler(filename=Path(serve.__file__).parent),
            server_context_params={"settings": self.settings},
        )
        with report_server_init_errors(address=address, port=port):
            server = Server(
                {"/": application},
                address=address,
                port=port,
                allow_websocket_origin=[f"{address}:{port}"],
                num_procs=num_procs,
                autoreload=autoreload,
            )
            server.start()

            if self.show:
                server.io_loop.add_callback(server.show, "/")
            server.run_until_shutdown()