from utility.NbConfig import NbConfig from utility import client_factory from utility import nb_logging logger = nb_logging.setup_logger('prune') if __name__ == "__main__": db_client = client_factory.get_db_client() db_client.ensure_config_table_exists() config = NbConfig({}) logger.info("Config to write: %s", config.config) db_client.write_config(config.config) config_read = db_client.read_config() logger.info("Config read from DB: %s", config_read.config)
import warnings import MySQLdb from datadog import statsd from ddtrace import patch_all from ddtrace import tracer from connectors.DbConnector import DbConnector from utility import nb_logging from utility.NbConfig import NbConfig import rollbar logger = nb_logging.setup_logger('MySqlClient') patch_all() STATSD_PREFIX = 'nb.MySqlClient.' class MySqlClient(DbConnector): def __init__(self, host, user, password, db_name): DbConnector.__init__(self) self.host = host self.user = user self.password = password self.db_name = db_name logger.info("Attempt to connect to DB %s on %s as user %s", db_name, host, user) self.conn = MySQLdb.connect(host=self.host, user=self.user,
import json import requests import requests.exceptions import rollbar from bs4 import BeautifulSoup from datadog import statsd from time import sleep from utility import nb_logging logger = nb_logging.setup_logger('NewsblurConnector') class NewsblurConnector: def __init__(self, config, username, password): self.cookies = None self.config = config self.verify = config.get('VERIFY') self.nb_endpoint = config.get('NB_ENDPOINT') self.credentials = {'username': username, 'password': password} @statsd.timed('nb.NewsblurConnector.login') def login(self): """ log in and save cookies """ r = requests.post(self.nb_endpoint + '/api/login', self.credentials) logger.debug('NewsBlur login response code: %s', r.status_code) statsd.increment('nb.http_requests.post') self.cookies = r.cookies
from datadog import statsd import time from utility import client_factory from utility import nb_logging from connectors.NewsblurConnector import NewsblurConnector logger = nb_logging.setup_logger('populate') config = None @statsd.timed('nb.populate.populate') def populate(): logger.info('Set up DB and add a row for each HN story') db_client = client_factory.get_db_client() db_client.ensure_stories_table_exists() config = db_client.read_config() db_client.close_connection() nb_client = client_factory.get_newsblur_client() nb_client.login() hashlist = nb_client.get_nb_hash_list() logger.info('Size of hashlist is %s', len(hashlist)) batch_size = int(config.get('BATCH_SIZE')) logger.debug('Batch size is %s', batch_size) i = 0
from datadog import statsd from models.NbUrl import NbUrl from utility import nb_logging, client_factory logger = nb_logging.setup_logger('add_domains') @statsd.timed('nb.add_domains.add_domains') def add_domains(): db_client = client_factory.get_db_client() db_client.ensure_domains_table_exists() rows = db_client.list_urls() for row in rows: nb_hash = row[0] nb_url = NbUrl(row[1]) domain, toplevel, toplevel_new = nb_url.get_domain_info() db_client.insert_domain_entry(nb_hash, nb_url.url, domain, toplevel, toplevel_new) if __name__ == "__main__": add_domains()
from datadog import statsd from utility import client_factory from utility import nb_logging logger = nb_logging.setup_logger('update_comment_counts') # update comment counts for stories that might have had comments added: # last updated is older than threshold # comments are still open on story @statsd.timed('nb.populate.update_comment_counts') def update_comment_counts(): logger.info('Update comment counts to stories in DB') db_client = client_factory.get_db_client() rows = db_client.list_comment_count_update_candidates() logger.debug('Found %s candidates for updating comment count', len(rows)) nb_client = client_factory.get_newsblur_client() nb_client.login() for row in rows: url = row[0] count = nb_client.get_comment_count(url) logger.debug("Count for %s is %s", url, count) if count is not None: db_client.add_comment_count(url, count) statsd.increment('nb.add_comment_counts.comment_counts_added') logger.info('Finished updating comment counts')
import warnings import sqlite3 from datadog import statsd from connectors.DbConnector import DbConnector from utility import nb_logging from utility.NbConfig import NbConfig import rollbar logger = nb_logging.setup_logger('SqliteClient') STATSD_PREFIX = 'nb.SqliteClient.' class SqliteClient(DbConnector): def __init__(self): DbConnector.__init__(self) db_file = 'nb.sqlite' # logger.info("Attempt to connect to DB file %s", db_file) self.conn = sqlite3.connect(db_file) def ensure_domains_table_exists(self): create_table_query = '''CREATE TABLE IF NOT EXISTS domains (id INTEGER PRIMARY KEY ASC, nb_hash TEXT UNIQUE, domain TEXT, toplevel TEXT, toplevel_new TEXT, FOREIGN KEY (nb_hash) REFERENCES stories (hash) )''' self.execute_wrapper(create_table_query) self.conn.commit()
import rollbar import schedule import time from tasks.add_comment_counts import add_comment_counts from tasks.add_domains import add_domains from tasks.populate import populate from tasks.populate import update_hash_list from tasks.prune import prune_starred from utility import client_factory from utility import nb_logging rollbar.init('00b402fc0da54ed1af8687d4c4389911') logger = nb_logging.setup_logger('app') from datadog import initialize initialize(statsd_host='dd_agent') def get_config(task): db_client = client_factory.get_db_client() config = db_client.read_config() # logger.debug('Config for %s: %s', task, config) return config def periodic_update_hash_list(): logger.info('Running scheduled update hash list task') update_hash_list() logger.info('Finished scheduled update hash list task')
from connectors.MySqlClient import MySqlClient from connectors.NewsblurConnector import NewsblurConnector from utility import nb_logging import os logger = nb_logging.setup_logger('client_factory') SECRETS_DIR = os.getenv('SECRETS_DIR', '/run/secrets/') def get_secret(name): with open(os.path.join(SECRETS_DIR, name), 'r') as f: secret = f.read().strip() return secret def get_db_client(): host = get_secret('DB_HOST') user = get_secret('DB_USER') password = get_secret('DB_PASS') db_name = get_secret('DB_NAME') logger.debug('host: {}'.format(host)) return MySqlClient(host=host, user=user, password=password, db_name=db_name) def get_newsblur_client(): db_client = get_db_client() username = get_secret('NB_USERNAME') password = get_secret('NB_PASSWORD') return NewsblurConnector(db_client.read_config(), username, password)
from datadog import statsd from utility import client_factory from utility import nb_logging from connectors.NewsblurConnector import NewsblurConnector logger = nb_logging.setup_logger('add_comment_counts') # read through DB for rows without comment count, then add it @statsd.timed('nb.populate.add_comment_counts') def add_comment_counts(): logger.info('Add comment counts to stories in DB') db_client = client_factory.get_db_client() rows = db_client.list_stories_without_comment_count() logger.debug('Found %s rows', len(rows)) nb_client = client_factory.get_newsblur_client() nb_client.login() for row in rows: # url = row.hnurl url = row[0] count = nb_client.get_comment_count(url) logger.debug("Count for %s is %s", url, count) if count is not None: db_client.add_comment_count(url, count) statsd.increment('nb.add_comment_counts.comment_counts_added') logger.info('Finished adding comment counts')
import json import requests import requests.exceptions import rollbar from bs4 import BeautifulSoup from datadog import statsd from ddtrace import patch from ddtrace import tracer from time import sleep from utility import nb_logging patch(requests=True) logger = nb_logging.setup_logger('NewsblurConnector') class NewsblurConnector: def __init__(self, config, username, password): self.cookies = None self.config = config self.verify = config.get('VERIFY') self.nb_endpoint = config.get('NB_ENDPOINT') self.credentials = {'username': username, 'password': password} @statsd.timed('nb.NewsblurConnector.login') def login(self): """ log in and save cookies """
from datadog import statsd from utility import client_factory from utility import nb_logging logger = nb_logging.setup_logger('populate') @statsd.timed('nb.populate.update_hash_list') def update_hash_list(): logger.info('Get full list of NB story hashes') nb_client = client_factory.get_newsblur_client() nb_client.login() hash_list = nb_client.get_nb_hash_list() logger.info('Size of hashlist retrieved from Newsblur is %s', len(hash_list)) db_client = client_factory.get_db_client() db_client.add_hashes(hash_list) @statsd.timed('nb.populate.populate') def populate(): logger.info('Add a row for each HN story') db_client = client_factory.get_db_client() db_client.ensure_stories_table_exists() config = db_client.read_config() db_client.close_connection() nb_client = client_factory.get_newsblur_client() nb_client.login()
from ddtrace import patch_all import time from datadog import statsd from connectors.DbConnector import DbConnector from connectors.dynamo.DomainModel import DomainModel from connectors.dynamo.StoryModel import StoryModel from connectors.dynamo.ErrorModel import ErrorModel from utility import nb_logging patch_all() logger = nb_logging.setup_logger('DynamoDbClient') class DynamoDbClient(DbConnector): def add_comment_count(self, comments_url, count): stories = StoryModel.query(comments_url) for dummy in stories: # only want the first element... story = dummy story.comments = count story.save() statsd.increment('nb.comment_counts_added') def add_story(self, nb_hash, added, comments_url, story_url): story = StoryModel(comments_url, nb_hash=nb_hash, added=added, url=story_url) try: story.save() except Exception as err: logger.error("Caught exception while saving Story model, wait 2 sec and retry")
import time from ddtrace import patch_all from ddtrace import tracer from tasks.add_comment_counts import add_comment_counts from tasks.add_domains import add_domains from tasks.populate import populate from tasks.prune import prune_starred from utility import client_factory from utility import nb_logging patch_all() rollbar.init('00b402fc0da54ed1af8687d4c4389911') logger = nb_logging.setup_logger('app') from datadog import initialize initialize(statsd_host='dd_agent') def get_config(task): db_client = client_factory.get_db_client() config = db_client.read_config() logger.debug('Config for %s: %s', task, config) return config def periodic_populate(): config = get_config('populate') if 'True' == config.get('SHOULD_POPULATE'): logger.info('Running scheduled populate task')