def init_logger(cls, log_name: str) -> None: level: int = logging.INFO logger_level: str = Env.get_environment('LOGGING_LEVEL', default='INFO') if type(logging.getLevelName(logger_level)) is int: level = logging.getLevelName(logger_level) cls.tz = Tz.timezone() logging.Formatter.converter = cls.time_converter stream_handler = logging.StreamHandler() handlers: List[logging.Handler] = [stream_handler] output_log_file_enabled: bool = Env.get_bool_environment( 'OUTPUT_LOG_FILE_ENABLED', default=True) if output_log_file_enabled: os.makedirs('logs', exist_ok=True) file_handler = RotatingFileHandler(filename=f'logs/{log_name}.log', maxBytes=5 * 1024 * 1024, backupCount=3, encoding='utf-8') handlers.append(file_handler) # noinspection PyArgumentList logging.basicConfig( handlers=handlers, format=cls.format, level=level, datefmt="%Y-%m-%d %H:%M:%S", ) logging.getLogger('googleapiclient').setLevel(logging.WARNING) logging.getLogger('google_auth_httplib2').setLevel(logging.WARNING)
def make_credentials(): client_id = Env.get_environment('GOOGLE_CLIENT_ID') client_secret = Env.get_environment('GOOGLE_CLIENT_SECRET') refresh_token = Env.get_environment('GOOGLE_REFRESH_TOKEN') return Credentials(DUMMY_ACCESS_TOKEN, refresh_token, None, "https://oauth2.googleapis.com/token", client_id, client_secret, SCOPES)
def __init__(self) -> None: self._db_url: str = Env.get_environment('DATABASE_URL', required=True) self._sslmode: str = Env.get_environment('DATABASE_SSLMODE', default='require', required=False) self._tz = Tz.timezone() logger.debug( f'Store setting info. _db_url={self._db_url}, _sslmode={self._sslmode}' )
def main(self): interval_minutes = int(Env.get_environment('INTERVAL', default='5')) user_ids = Env.get_environment('TWITTER_USER_IDS') user_list = [TwitterUser(user_id) for user_id in user_ids.split(',')] while True: try: for user in user_list: self.crawling_rt(user) except: traceback.print_exc() time.sleep(interval_minutes * 60)
def main() -> None: client_id: str = Env.get_environment('GOOGLE_CLIENT_ID', required=True) client_secret: str = Env.get_environment('GOOGLE_CLIENT_SECRET', required=True) client_config = { 'installed': { 'auth_uri': 'https://accounts.google.com/o/oauth2/auth', 'token_uri': 'https://accounts.google.com/o/oauth2/token', 'redirect_uris': ['urn:ietf:wg:oauth:2.0:oob'], 'client_id': client_id, 'client_secret': client_secret } } flow = InstalledAppFlow.from_client_config(client_config, SCOPES) credentials: Credentials = flow.run_console() print(f'refresh_token: {vars(credentials)["_refresh_token"]}')
def get_access_token(): client_id = Env.get_environment('GOOGLE_CLIENT_ID') client_secret = Env.get_environment('GOOGLE_CLIENT_SECRET') client_config = { "installed": { "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": "https://accounts.google.com/o/oauth2/token", "redirect_uris": ["urn:ietf:wg:oauth:2.0:oob"], "client_id": client_id, "client_secret": client_secret } } flow = InstalledAppFlow.from_client_config(client_config, SCOPES) credentials = flow.run_console() print(f'refresh_token: {vars(credentials)["_refresh_token"]}')
def __init__(self) -> None: self.tweet_page: int = int( Env.get_environment('TWEET_PAGES', default='25')) self.tweet_count: int = int( Env.get_environment('TWEET_COUNT', default='200')) self.mode: str = Env.get_environment('MODE_SPECIFIED', default='rt') self._last_fav_result: Dict[str, TweetMedia] = {} consumer_key: str = Env.get_environment('TWITTER_CONSUMER_KEY', required=True) consumer_secret: str = Env.get_environment('TWITTER_CONSUMER_SECRET', required=True) access_token: str = Env.get_environment('TWITTER_ACCESS_TOKEN', required=True) access_token_secret: str = Env.get_environment( 'TWITTER_ACCESS_TOKEN_SECRET', required=True) auth: tweepy.OAuthHandler = tweepy.OAuthHandler( consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) self.api: tweepy.API = tweepy.API(auth, retry_count=3, retry_delay=5, retry_errors={500, 503}, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) logger.debug( f'Twitter setting info. tweet_page={self.tweet_page}, tweet_count={self.tweet_count}, ' f'mode={self.mode}')
def __init__(self) -> None: self._save_mode: str = Env.get_environment('SAVE_MODE', default='local') self.twitter: Twitter = Twitter() self.store: Store = Store() if self._save_mode == 'google': self.google_photos: GooglePhotos = GooglePhotos() self._download_dir: str = './download' os.makedirs(self._download_dir, exist_ok=True)
def main(self) -> None: interval_minutes: int = int( Env.get_environment('INTERVAL', default='5')) user_ids: str = Env.get_environment('TWITTER_USER_IDS', required=True) user_list: List[TwitterUser] = [ TwitterUser(id=user_id) for user_id in user_ids.split(',') ] while True: try: for user in user_list: logger.info( f'Crawling start. user = {user.id}, mode={self.twitter.mode}' ) self.crawling_tweets(user) except Exception as e: logger.exception(f'Crawling error exception={e.args}') logger.info(f'Interval. sleep {interval_minutes} minutes.') time.sleep(interval_minutes * 60)
def timezone(zone: str = '') -> Any: tz_str: str = zone if zone == '': tz_str = Env.get_environment('TZ') if tz_str == '': return pendulum.UTC else: # noinspection PyUnresolvedReferences try: return pendulum.timezone(tz_str) except pendulum.tz.zoneinfo.exceptions.InvalidTimezone: return pendulum.UTC
def __init__(self): self.tweet_page = int(Env.get_environment('TWEET_PAGES', default='5')) self.tweet_count = int( Env.get_environment('TWEET_COUNT', default='100')) consumer_key = Env.get_environment('TWITTER_CONSUMER_KEY') consumer_secret = Env.get_environment('TWITTER_CONSUMER_SECRET') access_token = Env.get_environment('TWITTER_ACCESS_TOKEN') access_token_secret = Env.get_environment( 'TWITTER_ACCESS_TOKEN_SECRET') auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) self.api = tweepy.API(auth, wait_on_rate_limit=True)