def _save_to_s3(self, data): assert len(data) > 0, "Need data to save!" # TODO: store AWS credentials in a better way. assert 'AWS_ACCESS_KEY_ID' in config, "Need AWS key!" assert 'AWS_SECRET_ACCESS_KEY' in config, "Need AWS secret!" assert 'MESSAGE_STORE_BUCKET_NAME' in config, \ "Need bucket name to store message data!" # Boto pools connections at the class level conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME')) # See if it alreays exists and has the same hash data_obj = bucket.get_key(self.data_sha256) if data_obj: assert data_obj.get_metadata('data_sha256') == self.data_sha256, \ "Block hash doesn't match what we previously stored on s3!" # log.info("Block already exists on S3.") return data_obj = Key(bucket) # if metadata: # assert type(metadata) is dict # for k, v in metadata.iteritems(): # data_obj.set_metadata(k, v) data_obj.set_metadata('data_sha256', self.data_sha256) # data_obj.content_type = self.content_type # Experimental data_obj.key = self.data_sha256 # log.info("Writing data to S3 with hash {0}".format(self.data_sha256)) # def progress(done, total): # log.info("%.2f%% done" % (done/total * 100) ) # data_obj.set_contents_from_string(data, cb=progress) data_obj.set_contents_from_string(data)
def receive_checkout(dbapi_connection, connection_record, connection_proxy): '''Log checkedout and overflow when a connection is checked out''' hostname = gethostname().replace(".", "-") process_name = str(config.get("PROCESS_NAME", "main_process")) if config.get('ENABLE_DB_TXN_METRICS', False): statsd_client.gauge(".".join( ["dbconn", database_name, hostname, process_name, "checkedout"]), connection_proxy._pool.checkedout()) statsd_client.gauge(".".join( ["dbconn", database_name, hostname, process_name, "overflow"]), connection_proxy._pool.overflow()) # Keep track of where and why this connection was checked out. log = get_logger() context = log._context._dict.copy() f, name = find_first_app_frame_and_name(ignores=['sqlalchemy', 'inbox.ignition', 'nylas.logging']) source = '{}:{}'.format(name, f.f_lineno) pool_tracker[dbapi_connection] = { 'source': source, 'context': context, 'checkedout_at': time.time() }
def send_invite(ical_txt, event, account, invite_type='request'): # We send those transactional emails through a separate domain. MAILGUN_API_KEY = config.get('NOTIFICATIONS_MAILGUN_API_KEY') MAILGUN_DOMAIN = config.get('NOTIFICATIONS_MAILGUN_DOMAIN') assert MAILGUN_DOMAIN is not None and MAILGUN_API_KEY is not None for participant in event.participants: email = participant.get('email', None) if email is None: continue if email == account.email_address: # If the organizer is among the participants, don't send # a second email. They already have the event on their # calendar. continue msg = generate_invite_message(ical_txt, event, account, invite_type) msg.headers['To'] = email final_message = msg.to_string() mg_url = 'https://api.mailgun.net/v3/{}/messages.mime'.format(MAILGUN_DOMAIN) r = requests.post(mg_url, auth=("api", MAILGUN_API_KEY), data={"to": email}, files={"message": final_message}) if r.status_code != 200: log.error("Couldnt send invite email for", email_address=email, event_id=event.id, account_id=account.id, logstash_tag='invite_sending', status_code=r.status_code)
def email_exception(logger, account_id, etype, evalue, tb): """ Send stringified exception to configured email address. """ exc_email_addr = config.get('EXCEPTION_EMAIL_ADDRESS') if exc_email_addr is None: logger.error('No EXCEPTION_EMAIL_ADDRESS configured!') mailgun_api_endpoint = config.get('MAILGUN_API_ENDPOINT') if mailgun_api_endpoint is None: logger.error('No MAILGUN_API_ENDPOINT configured!') mailgun_api_key = config.get('MAILGUN_API_KEY') if mailgun_api_key is None: logger.error('No MAILGUN_API_KEY configured!') account_str = 'account_id {}: '.format(account_id) if account_id else '' r = requests.post( mailgun_api_endpoint, auth=('api', mailgun_api_key), data={'from': "Inbox App Server <{}>".format(exc_email_addr), 'to': [exc_email_addr], 'subject': "Uncaught error! {}{} {}".format(account_str, etype, evalue), 'text': u""" Something went wrong on {}. Please investigate. :) {} """.format(socket.getfqdn(), safe_format_exception(etype, evalue, tb))}) if r.status_code != requests.codes.ok: logger.error("Couldn't send exception email: {}".format(r.text))
def get_redis_client(): global redis_client if redis_client is None: global redis_hostname if redis_hostname is None: redis_hostname = config.get('REDIS_HOSTNAME') if redis_hostname is None or not isinstance(redis_hostname, str): raise Exception('Error while reading REDIS_HOSTNAME') global redis_port if redis_port is None: redis_port = config.get('REDIS_PORT') if redis_port is None or not isinstance(redis_port, int): raise Exception('Error while reading REDIS_PORT') global redis_database if redis_database is None: redis_database = config.get('REDIS_DATABASE') if redis_database is None or not isinstance(redis_database, int) or \ redis_database < 1 or redis_database > 15: raise Exception('Error while reading REDIS_DATABASE') redis_client = StrictRedis(host=redis_hostname, port=redis_port, db=redis_database) return redis_client
def _save_to_s3(data_sha256, data): assert 'AWS_ACCESS_KEY_ID' in config, 'Need AWS key!' assert 'AWS_SECRET_ACCESS_KEY' in config, 'Need AWS secret!' assert 'MESSAGE_STORE_BUCKET_NAME' in config, \ 'Need bucket name to store message data!' start = time.time() # Boto pools connections at the class level conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME'), validate=False) # See if it already exists; if so, don't recreate. key = bucket.get_key(data_sha256) if key: return key = Key(bucket) key.key = data_sha256 key.set_contents_from_string(data) end = time.time() latency_millis = (end - start) * 1000 statsd_client.timing('s3.save_latency', latency_millis)
def __init__(self, process_identifier, cpu_id, poll_interval=10): self.host = platform.node() self.cpu_id = cpu_id self.process_identifier = process_identifier self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(cpu_id=cpu_id) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval self.semaphore = BoundedSemaphore(1) self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self.zone = config.get('ZONE') self.queue_client = QueueClient(self.zone)
def __init__(self, process_identifier, process_number, poll_interval=SYNC_POLL_INTERVAL): self.host = platform.node() self.process_number = process_number self.process_identifier = process_identifier self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(process_number=process_number) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval self.semaphore = BoundedSemaphore(1) self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self.zone = config.get('ZONE') self.queue_client = QueueClient(self.zone) self.rolling_cpu_counts = collections.deque(maxlen=NUM_CPU_SAMPLES) self.last_unloaded_account = time.time() # Fill the queue with initial values. null_cpu_values = [0.0 for cpu in psutil.cpu_percent(percpu=True)] for i in range(NUM_CPU_SAMPLES): self.rolling_cpu_counts.append(null_cpu_values)
def configure_logging(is_prod): tty_handler = logging.StreamHandler(sys.stdout) if not is_prod: # Use a more human-friendly format. formatter = colorlog.ColoredFormatter( '%(log_color)s[%(levelname)s]%(reset)s %(message)s', reset=True, log_colors={'DEBUG': 'cyan', 'INFO': 'green', 'WARNING': 'yellow', 'ERROR': 'red', 'CRITICAL': 'red'}) else: formatter = logging.Formatter('%(message)s') tty_handler.setFormatter(formatter) tty_handler._inbox = True # Configure the root logger. root_logger = logging.getLogger() for handler in root_logger.handlers: # If the handler was previously installed, remove it so that repeated # calls to configure_logging() are idempotent. if getattr(handler, '_inbox', False): root_logger.removeHandler(handler) root_logger.addHandler(tty_handler) # Set loglevel DEBUG if config value is missing. root_logger.setLevel(config.get('LOGLEVEL', 10)) if config.get('SENTRY_EXCEPTIONS'): sentry_dsn = config.get_required('SENTRY_DSN') global sentry_client sentry_client = raven.Client( sentry_dsn, processors=('inbox.log.TruncatingProcessor',))
def __init__(self, process_identifier, cpu_id, poll_interval=10): self.host = platform.node() self.cpu_id = cpu_id self.process_identifier = process_identifier self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(cpu_id=cpu_id) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval self.semaphore = BoundedSemaphore(1) self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self.zone = config.get('ZONE') self.queue_client = QueueClient(self.zone) # We call cpu_percent in a non-blocking way. Because of the way # this function works, it'll always return 0.0 the first time # we call it. See: https://pythonhosted.org/psutil/#psutil.cpu_percent # for more details. psutil.cpu_percent(percpu=True)
def session_scope(id_, versioned=True, explicit_begin=False): """ Provide a transactional scope around a series of operations. Takes care of rolling back failed transactions and closing the session when it goes out of scope. Note that sqlalchemy automatically starts a new database transaction when the session is created, and restarts a new transaction after every commit() on the session. Your database backend's transaction semantics are important here when reasoning about concurrency. Parameters ---------- versioned : bool Do you want to enable the transaction log? explicit_begin: bool If True, issue an explicit BEGIN statement instead of relying on implicit transactional semantics. Yields ------ Session The created session. """ engine = engine_manager.get_for_id(id_) session = new_session(engine, versioned) try: if config.get('LOG_DB_SESSIONS'): start_time = time.time() calling_frame = sys._getframe().f_back.f_back call_loc = '{}:{}'.format(calling_frame.f_globals.get('__name__'), calling_frame.f_lineno) logger = log.bind(engine_id=id(engine), session_id=id(session), call_loc=call_loc) logger.info('creating db_session', sessions_used=engine.pool.checkedout()) yield session session.commit() except (gevent.GreenletExit, gevent.Timeout) as exc: log.info('Invalidating connection on gevent exception', exc_info=True) session.invalidate() except BaseException as exc: try: session.rollback() raise except OperationalError: log.warn('Encountered OperationalError on rollback', original_exception=type(exc)) raise exc finally: if config.get('LOG_DB_SESSIONS'): lifetime = time.time() - start_time logger.info('closing db_session', lifetime=lifetime, sessions_used=engine.pool.checkedout()) session.close()
def session_scope(versioned=True, ignore_soft_deletes=True, namespace_id=None): """ Provide a transactional scope around a series of operations. Takes care of rolling back failed transactions and closing the session when it goes out of scope. Note that sqlalchemy automatically starts a new database transaction when the session is created, and restarts a new transaction after every commit() on the session. Your database backend's transaction semantics are important here when reasoning about concurrency. Parameters ---------- versioned : bool Do you want to enable the transaction log? ignore_soft_deletes : bool Whether or not to ignore soft-deleted objects in query results. namespace_id : int Namespace to limit query results with. Yields ------ InboxSession The created session. """ global cached_engine if cached_engine is None: cached_engine = main_engine() log.info("Don't yet have engine... creating default from ignition", engine=id(cached_engine)) session = InboxSession(cached_engine, versioned=versioned, ignore_soft_deletes=ignore_soft_deletes, namespace_id=namespace_id) try: if config.get('LOG_DB_SESSIONS'): start_time = time.time() calling_frame = sys._getframe().f_back.f_back call_loc = '{}:{}'.format(calling_frame.f_globals.get('__name__'), calling_frame.f_lineno) logger = log.bind(engine_id=id(cached_engine), session_id=id(session), call_loc=call_loc) logger.info('creating db_session', sessions_used=cached_engine.pool.checkedout()) yield session session.commit() except: session.rollback() raise finally: if config.get('LOG_DB_SESSIONS'): lifetime = time.time() - start_time logger.info('closing db_session', lifetime=lifetime, sessions_used=cached_engine.pool.checkedout()) session.close()
def _get_from_s3(self): assert self.data_sha256, "Can't get data with no hash!" # Boto pools connections at the class level conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME')) data_obj = bucket.get_key(self.data_sha256) assert data_obj, "No data returned!" return data_obj.get_contents_as_string()
def _get_from_s3(self): assert self.data_sha256, "Can't get data with no hash!" # Boto pools connections at the class level conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME'), validate=False) data_obj = bucket.get_key(self.data_sha256) assert data_obj, "No data returned!" return data_obj.get_contents_as_string()
def session_scope(id_, versioned=True): """ Provide a transactional scope around a series of operations. Takes care of rolling back failed transactions and closing the session when it goes out of scope. Note that sqlalchemy automatically starts a new database transaction when the session is created, and restarts a new transaction after every commit() on the session. Your database backend's transaction semantics are important here when reasoning about concurrency. Parameters ---------- versioned : bool Do you want to enable the transaction log? debug : bool Do you want to turn on SQL echoing? Use with caution. Engine is not cached in this case! Yields ------ Session The created session. """ engine = engine_manager.get_for_id(id_) session = new_session(engine, versioned) try: if config.get('LOG_DB_SESSIONS'): start_time = time.time() calling_frame = sys._getframe().f_back.f_back call_loc = '{}:{}'.format(calling_frame.f_globals.get('__name__'), calling_frame.f_lineno) logger = log.bind(engine_id=id(engine), session_id=id(session), call_loc=call_loc) logger.info('creating db_session', sessions_used=engine.pool.checkedout()) yield session session.commit() except BaseException as exc: try: session.rollback() raise except OperationalError: log.warn('Encountered OperationalError on rollback', original_exception=type(exc)) raise exc finally: if config.get('LOG_DB_SESSIONS'): lifetime = time.time() - start_time logger.info('closing db_session', lifetime=lifetime, sessions_used=engine.pool.checkedout()) session.close()
def session_scope(versioned=True, ignore_soft_deletes=False): """ Provide a transactional scope around a series of operations. Takes care of rolling back failed transactions and closing the session when it goes out of scope. Note that sqlalchemy automatically starts a new database transaction when the session is created, and restarts a new transaction after every commit() on the session. Your database backend's transaction semantics are important here when reasoning about concurrency. Parameters ---------- versioned : bool Do you want to enable the transaction log? Yields ------ Session The created session. """ global cached_engine if cached_engine is None: cached_engine = main_engine() log.info("Don't yet have engine... creating default from ignition", engine=id(cached_engine)) session = new_session(cached_engine, versioned) try: if config.get('LOG_DB_SESSIONS'): start_time = time.time() calling_frame = sys._getframe().f_back.f_back call_loc = '{}:{}'.format(calling_frame.f_globals.get('__name__'), calling_frame.f_lineno) logger = log.bind(engine_id=id(cached_engine), session_id=id(session), call_loc=call_loc) logger.info('creating db_session', sessions_used=cached_engine.pool.checkedout()) yield session session.commit() except: session.rollback() raise finally: if config.get('LOG_DB_SESSIONS'): lifetime = time.time() - start_time logger.info('closing db_session', lifetime=lifetime, sessions_used=cached_engine.pool.checkedout()) session.close()
def run(self): if config.get('DEBUG_CONSOLE_ON'): # Enable the debugging console if this flag is set. Connect to # localhost on the port shown in the logs to get access to a REPL port = None start_port = config.get('DEBUG_START_PORT') if start_port: port = start_port + self.cpu_id gevent.spawn(break_to_interpreter, port=port) setproctitle('inbox-sync-{}'.format(self.cpu_id)) retry_with_logging(self._run_impl, self.log)
def _is_in_s3(data_sha256): assert 'AWS_ACCESS_KEY_ID' in config, 'Need AWS key!' assert 'AWS_SECRET_ACCESS_KEY' in config, 'Need AWS secret!' assert 'MESSAGE_STORE_BUCKET_NAME' in config, \ 'Need bucket name to store message data!' # Boto pools connections at the class level conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME'), validate=False) return bool(bucket.get_key(data_sha256))
def _get_from_s3_bucket(data_sha256, bucket_name): if not data_sha256: return None conn = S3Connection(config.get("AWS_ACCESS_KEY_ID"), config.get("AWS_SECRET_ACCESS_KEY")) bucket = conn.get_bucket(bucket_name, validate=False) key = bucket.get_key(data_sha256) if not key: log.error("No key with name: {} returned!".format(data_sha256)) return return key.get_contents_as_string()
def _get_redis_client(): global redis_hostname if redis_hostname is None: redis_hostname = config.get('REDIS_HOSTNAME') global redis_port if redis_port is None: redis_port = config.get('REDIS_PORT') global redis_client if redis_client is None: redis_client = StrictRedis(host=redis_hostname, port=redis_port) return redis_client
def __init__(self, process_identifier, process_number, poll_interval=SYNC_POLL_INTERVAL): self.host = platform.node() self.process_number = process_number self.process_identifier = process_identifier self.monitor_cls_for = { mod.PROVIDER: getattr(mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS') } for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(process_number=process_number) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} # Randomize the poll_interval so we maintain at least a little fairness # when using a timeout while blocking on the redis queues. min_poll_interval = 5 self.poll_interval = int((random.random() * (poll_interval - min_poll_interval)) + min_poll_interval) self.semaphore = BoundedSemaphore(1) self.zone = config.get('ZONE') # Note that we don't partition by zone for the private queues. # There's not really a reason to since there's one queue per machine # anyways. Also, if you really want to send an Account to a mailsync # machine in another zone you can do so. self.private_queue = EventQueue( SYNC_EVENT_QUEUE_NAME.format(self.process_identifier)) self.queue_group = EventQueueGroup([ shared_sync_event_queue_for_zone(self.zone), self.private_queue, ]) self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self._pending_avgs_provider = None self.last_unloaded_account = time.time()
def get_heartbeat_config(): global g_alive_threshold if g_alive_threshold is None: g_alive_threshold = config.get('ALIVE_THRESHOLD') if g_alive_threshold is None or not isinstance(g_alive_threshold, int): raise Exception('Error while reading ALIVE_THRESHOLD') global g_alive_threshold_eas if g_alive_threshold_eas is None: g_alive_threshold_eas = config.get('ALIVE_THRESHOLD_EAS') if g_alive_threshold_eas is None or \ not isinstance(g_alive_threshold_eas, int): raise Exception('Error while reading ALIVE_THRESHOLD_EAS') return (g_alive_threshold, g_alive_threshold_eas)
def _get_from_s3_bucket(data_sha256, bucket_name): if not data_sha256: return None conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(bucket_name, validate=False) key = bucket.get_key(data_sha256) if not key: log.error('No key with name: {} returned!'.format(data_sha256)) return return key.get_contents_as_string()
def __init__( self, blocking_sample_period=BLOCKING_SAMPLE_PERIOD, sampling_interval=GREENLET_SAMPLING_INTERVAL, logging_interval=LOGGING_INTERVAL, ): self.blocking_sample_period = blocking_sample_period self.sampling_interval = sampling_interval self.logging_interval = logging_interval self.time_spent_by_context = collections.defaultdict(float) self.total_switches = 0 self._last_switch_time = None self._switch_flag = False self._active_greenlet = None self._main_thread_id = gevent._threading.get_ident() self._hub = gevent.hub.get_hub() self.last_logged_stats = time.time() self.last_checked_blocking = time.time() self.total_cpu_time = 0 self.process = psutil.Process() self.pending_avgs = {1: 0, 5: 0, 15: 0} self.cpu_avgs = {1: 0, 5: 0, 15: 0} self.hostname = socket.gethostname().replace(".", "-") self.process_name = str(config.get("PROCESS_NAME", "unknown")) # We need a new client instance here because this runs in its own # thread. self.statsd_client = get_statsd_client() self.start_time = time.time()
def new_connection(): """Get a new connection to the Elasticsearch hosts defined in config. """ elasticsearch_hosts = config.get('ELASTICSEARCH_HOSTS') if not elasticsearch_hosts: raise SearchInterfaceError('No search hosts configured') return elasticsearch.Elasticsearch(hosts=elasticsearch_hosts)
def f(session): if obj_state['sent_event']: return id = obj_state['id'] sync_should_run = obj_state['sync_should_run'] sync_host = obj_state['sync_host'] desired_sync_host = obj_state['desired_sync_host'] try: if sync_host is not None: # Somebody is actively syncing this Account, so notify them if # they should give up the Account. if not sync_should_run or (sync_host != desired_sync_host and desired_sync_host is not None): queue_name = SYNC_EVENT_QUEUE_NAME.format(sync_host) log.info("Sending 'migrate_from' event for Account", account_id=id, queue_name=queue_name) EventQueue(queue_name).send_event({ 'event': 'migrate_from', 'id': id }) return if not sync_should_run: # We don't need to notify anybody because the Account is not # actively being synced (sync_host is None) and sync_should_run is False, # so just return early. return if desired_sync_host is not None: # Nobody is actively syncing the Account, and we have somebody # who wants to sync this Account, so notify them. queue_name = SYNC_EVENT_QUEUE_NAME.format( desired_sync_host) log.info("Sending 'migrate_to' event for Account", account_id=id, queue_name=queue_name) EventQueue(queue_name).send_event({ 'event': 'migrate_to', 'id': id }) return # Nobody is actively syncing the Account, and nobody in particular # wants to sync the Account so notify the shared queue. shared_queue = shared_sync_event_queue_for_zone( config.get('ZONE')) log.info("Sending 'migrate' event for Account", account_id=id, queue_name=shared_queue.queue_name) shared_queue.send_event({'event': 'migrate', 'id': id}) obj_state['sent_event'] = True except: log_uncaught_errors(log, account_id=id, sync_host=sync_host, desired_sync_host=desired_sync_host)
def configure_logging(is_prod=False): # The is_prod argument is ignored and only retained for compatibility. tty_handler = logging.StreamHandler(sys.stdout) if sys.stdout.isatty(): # Use a more human-friendly format. formatter = colorlog.ColoredFormatter( '%(log_color)s[%(levelname)s]%(reset)s %(message)s', reset=True, log_colors={'DEBUG': 'cyan', 'INFO': 'green', 'WARNING': 'yellow', 'ERROR': 'red', 'CRITICAL': 'red'}) else: formatter = logging.Formatter('%(message)s') tty_handler.setFormatter(formatter) tty_handler._inbox = True # Configure the root logger. root_logger = logging.getLogger() for handler in root_logger.handlers: # If the handler was previously installed, remove it so that repeated # calls to configure_logging() are idempotent. if getattr(handler, '_inbox', False): root_logger.removeHandler(handler) root_logger.addHandler(tty_handler) # Set loglevel DEBUG if config value is missing. root_logger.setLevel(config.get('LOGLEVEL', 10))
def __init__(self, databases, users, include_disabled=False): self.engines = {} keys = set() schema_names = set() use_proxysql = config.get('USE_PROXYSQL', False) for database in databases: hostname = '127.0.0.1' if use_proxysql else database['HOSTNAME'] port = database['PORT'] username = users[hostname]['USER'] password = users[hostname]['PASSWORD'] for shard in database['SHARDS']: schema_name = shard['SCHEMA_NAME'] key = shard['ID'] # Perform some sanity checks on the configuration. assert isinstance(key, int) assert key not in keys, \ 'Shard key collision: key {} is repeated'.format(key) assert schema_name not in schema_names, \ 'Shard name collision: {} is repeated'.format(schema_name) keys.add(key) schema_names.add(schema_name) if shard.get('DISABLED') and not include_disabled: log.info('Not creating engine for disabled shard', schema_name=schema_name, hostname=hostname, key=key) continue uri = build_uri(username=username, password=password, database_name=schema_name, hostname=hostname, port=port) self.engines[key] = engine(schema_name, uri)
def start(port, start_syncback, enable_tracer, enable_profiler): # We need to import this down here, because this in turn imports # ignition.engine, which has to happen *after* we read any config overrides # for the database parameters. Boo for imports with side-effects. from inbox.api.srv import app if start_syncback: # start actions service from inbox.transactions.actions import SyncbackService if enable_profiler: inbox_config["DEBUG_PROFILING_ON"] = True enable_profiler_api = inbox_config.get("DEBUG_PROFILING_ON") syncback = SyncbackService(0, 0, 1) profiling_frontend = SyncbackHTTPFrontend( int(port) + 1, enable_tracer, enable_profiler_api) profiling_frontend.start() syncback.start() nylas_logger = get_logger() http_server = WSGIServer(("", int(port)), app, log=nylas_logger, handler_class=NylasWSGIHandler) nylas_logger.info("Starting API server", port=port) http_server.serve_forever() if start_syncback: syncback.join()
def _save_to_s3(data_sha256, data): # type: (str, bytes) -> None assert ("TEMP_MESSAGE_STORE_BUCKET_NAME" in config), "Need temp bucket name to store message data!" _save_to_s3_bucket(data_sha256, config.get("TEMP_MESSAGE_STORE_BUCKET_NAME"), data)
def __init__(self, databases, users, include_disabled=False): self.engines = {} keys = set() schema_names = set() use_proxysql = config.get("USE_PROXYSQL", False) for database in databases: hostname = "127.0.0.1" if use_proxysql else database["HOSTNAME"] port = database["PORT"] username = users[hostname]["USER"] password = users[hostname]["PASSWORD"] for shard in database["SHARDS"]: schema_name = shard["SCHEMA_NAME"] key = shard["ID"] # Perform some sanity checks on the configuration. assert isinstance(key, int) assert key not in keys, "Shard key collision: key {} is repeated".format(key) assert schema_name not in schema_names, "Shard name collision: {} is repeated".format(schema_name) keys.add(key) schema_names.add(schema_name) if shard.get("DISABLED") and not include_disabled: log.info( "Not creating engine for disabled shard", schema_name=schema_name, hostname=hostname, key=key ) continue uri = build_uri( username=username, password=password, database_name=schema_name, hostname=hostname, port=port ) self.engines[key] = engine(schema_name, uri)
def main(): from inbox.config import config, secrets_path maybe_enable_rollbar() # If the config contains encryption keys, don't override. if config.get("SECRET_ENCRYPTION_KEY"): raise Exception("Encryption keys already present in secrets config " "file {0}".format(secrets_path)) # Generate keys data = { "SECRET_ENCRYPTION_KEY": binascii.hexlify(nacl.utils.random(nacl.secret.SecretBox.KEY_SIZE)), "BLOCK_ENCRYPTION_KEY": binascii.hexlify(nacl.utils.random(nacl.secret.SecretBox.KEY_SIZE)), } # Our secrets config file contains our database credentials etc., # so it better exist. # Update it try: with open(secrets_path, "a") as f: print( "Writing keys to secrets config file {0}".format(secrets_path)) yaml.dump(data, f, default_flow_style=False) except IOError: raise Exception( "Check file write permissions on config file {0}".format( secrets_path)) # Update the config dict config.update(data)
def __init__(self, cpu_id, total_cpus, poll_interval=10): self.keep_running = True self.host = platform.node() self.cpu_id = cpu_id self.total_cpus = total_cpus self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(cpu_id=cpu_id) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self.sync_hosts_for_shards = {} for database in config['DATABASE_HOSTS']: for shard in database['SHARDS']: # If no sync hosts are explicitly configured for the shard, # then try to steal from it. That way if you turn up a new # shard without properly allocating sync hosts to it, accounts # on it will still be started. self.sync_hosts_for_shards[shard['ID']] = shard.get( 'SYNC_HOSTS') or [self.host]
def configure_logging(is_prod): tty_handler = logging.StreamHandler(sys.stdout) if not is_prod: # Use a more human-friendly format. formatter = colorlog.ColoredFormatter( '%(log_color)s[%(levelname)s]%(reset)s %(message)s', reset=True, log_colors={ 'DEBUG': 'cyan', 'INFO': 'green', 'WARNING': 'yellow', 'ERROR': 'red', 'CRITICAL': 'red' }) else: formatter = logging.Formatter('%(message)s') tty_handler.setFormatter(formatter) tty_handler._inbox = True # Configure the root logger. root_logger = logging.getLogger() for handler in root_logger.handlers: # If the handler was previously installed, remove it so that repeated # calls to configure_logging() are idempotent. if getattr(handler, '_inbox', False): root_logger.removeHandler(handler) root_logger.addHandler(tty_handler) # Set loglevel DEBUG if config value is missing. root_logger.setLevel(config.get('LOGLEVEL', 10))
def accounts_to_start(self): accounts = [] for key in engine_manager.engines: with session_scope_by_shard_id(key) as db_session: start_on_this_cpu = self.account_cpu_filter(self.cpu_id, self.total_cpus) if config.get('SYNC_STEAL_ACCOUNTS', True): q = db_session.query(Account).filter( Account.sync_host.is_(None), Account.sync_should_run, start_on_this_cpu) unscheduled_accounts_exist = db_session.query( q.exists()).scalar() if unscheduled_accounts_exist: # Atomically claim unscheduled syncs by setting # sync_host. q.update({'sync_host': self.host}, synchronize_session=False) db_session.commit() accounts.extend([id_ for id_, in db_session.query(Account.id).filter( Account.sync_should_run, Account.sync_host == self.host, start_on_this_cpu)]) return accounts
def _get_from_s3(data_sha256): assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!" assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!" assert ("TEMP_MESSAGE_STORE_BUCKET_NAME" in config), "Need temp bucket name to store message data!" # Try getting data from our temporary blockstore before # trying getting it from the provider. data = _get_from_s3_bucket(data_sha256, config.get("TEMP_MESSAGE_STORE_BUCKET_NAME")) if data is not None: log.info( "Found hash in temporary blockstore!", sha256=data_sha256, logstash_tag="s3_direct", ) return data log.info("Couldn't find data in blockstore", sha256=data_sha256, logstash_tag="s3_direct") return None
def suspend_sync(): g.parser.add_argument('account_id', required=True, type=valid_public_id, location='form') args = strict_parse_args(g.parser, request.args) namespace_public_id = args['account_id'] with global_session_scope() as db_session: namespace = db_session.query(Namespace) \ .filter(Namespace.public_id == namespace_public_id).one() namespace_id = namespace.id with session_scope(namespace_id) as db_session: namespace = db_session.query(Namespace) \ .filter(Namespace.public_id == namespace_public_id).one() account = namespace.account account.sync_should_run = False account._sync_status[ 'sync_disabled_reason'] = 'suspend_account API endpoint called' account._sync_status['sync_disabled_on'] = datetime.utcnow() account._sync_status['sync_disabled_by'] = 'api' db_session.commit() shared_queue = shared_sync_event_queue_for_zone(config.get('ZONE')) shared_queue.send_event({'event': 'sync_suspended', 'id': account.id}) return make_response(('', 204, {}))
def receive_checkout(dbapi_connection, connection_record, connection_proxy): '''Log checkedout and overflow when a connection is checked out''' hostname = gethostname().replace(".", "-") process_name = str(config.get("PROCESS_NAME", "unknown")) statsd_client.gauge( ".".join([ "dbconn", database_name, hostname, process_name, "checkedout" ]), connection_proxy._pool.checkedout()) statsd_client.gauge( ".".join( ["dbconn", database_name, hostname, process_name, "overflow"]), connection_proxy._pool.overflow()) # Keep track of where and why this connection was checked out. log = get_logger() context = log._context._dict.copy() f, name = find_first_app_frame_and_name( ignores=['sqlalchemy', 'inbox.ignition', 'nylas.logging']) source = '{}:{}'.format(name, f.f_lineno) pool_tracker[dbapi_connection] = { 'source': source, 'context': context, 'checkedout_at': time.time() }
def _get_from_s3(data_sha256): if not data_sha256: return None conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME'), validate=False) key = bucket.get_key(data_sha256) if not key: log.error('No key with name: {} returned!'.format(data_sha256)) return return key.get_contents_as_string()
def __init__(self, max_blocking_time=MAX_BLOCKING_TIME, sampling_interval=GREENLET_SAMPLING_INTERVAL, logging_interval=LOGGING_INTERVAL): self.max_blocking_time = max_blocking_time self.sampling_interval = sampling_interval self.logging_interval = logging_interval self.time_spent_by_context = collections.defaultdict(float) self.total_switches = 0 self._last_switch_time = None self._switch_flag = False self._active_greenlet = None self._main_thread_id = gevent._threading.get_ident() self._hub = gevent.hub.get_hub() self.last_logged_stats = time.time() self.last_checked_blocking = time.time() self.total_cpu_time = 0 self.process = psutil.Process() self.pending_avgs = {1: 0, 5: 0, 15: 0} self.cpu_avgs = {1: 0, 5: 0, 15: 0} self.hostname = socket.gethostname().replace(".", "-") self.process_name = str(config.get("PROCESS_NAME", "unknown")) # We need a new client instance here because this runs in its own # thread. self.statsd_client = get_statsd_client()
def __init__(self, syncback_id, process_number, total_processes, poll_interval=1, retry_interval=30): self.process_number = process_number self.total_processes = total_processes self.poll_interval = poll_interval self.retry_interval = retry_interval self.keep_running = True self.workers = gevent.pool.Group() # Dictionary account_id -> semaphore to serialize action syncback for # any particular account. # TODO(emfree): We really only need to serialize actions that operate # on any given object. But IMAP actions are already effectively # serialized by using an IMAP connection pool of size 1, so it doesn't # matter too much. self.account_semaphores = defaultdict(lambda: BoundedSemaphore(1)) # This SyncbackService performs syncback for only and all the accounts # on shards it is reponsible for; shards are divided up between # running SyncbackServices. self.log = logger.new(component='syncback') syncback_assignments = config.get("SYNCBACK_ASSIGNMENTS", {}) if syncback_id in syncback_assignments: self.keys = [key for key in engine_manager.engines if key in syncback_assignments[syncback_id] and key % total_processes == process_number] else: self.log.warn("No shards assigned to syncback server", syncback_id=syncback_id) self.keys = [] self.log = logger.new(component='syncback') gevent.Greenlet.__init__(self)
def new_gmail_account(): logger = get_logger() authcode = request.values.get('authcode') redirecturi = request.values.get('redirecturi') reauth = True auth_handler = GmailAuthHandler(provider_name='gmail') auth_handler.OAUTH_REDIRECT_URI = redirecturi auth_info = auth_handler._get_authenticated_user(authcode) auth_info['contacts'] = True auth_info['events'] = True auth_info['provider'] = 'gmail' email_address = auth_info['email'] account_exists = False # Check for email in allowed emails list emails_filter_enabled = config.get('EMAILS_FILTER_ENABLED') allowed_emails = config.get('ALLOWED_EMAILS') if emails_filter_enabled and allowed_emails and email_address not in allowed_emails: return jsonify({"code": "email_not_allowed", "message": "Email not allowed", "profile": auth_info}) with session_scope(0) as db_session: account = db_session.query(Account).filter_by(email_address=email_address).first() if account is not None and not reauth: api_id = account.namespace.public_id return jsonify({"code": "account_exist", "message": "Account already exist", "api_id": api_id}) elif account is not None and reauth: account_exists = True account = auth_handler.update_account(account, auth_info) else: account = auth_handler.create_account(email_address, auth_info) try: if auth_handler.verify_account(account): db_session.add(account) db_session.commit() except NotSupportedError as e: return default_json_error(e) api_id = account.namespace.public_id if account_exists: return jsonify({"code": "account_updated", "message": "Account already exist and Updated", "api_id": api_id}) return jsonify({"code": "account_created", "message": "new account created", "api_id": api_id})
def __init__( self, syncback_id, process_number, total_processes, poll_interval=1, retry_interval=120, num_workers=NUM_PARALLEL_ACCOUNTS, batch_size=20, fetch_batch_size=100, ): self.process_number = process_number self.total_processes = total_processes self.poll_interval = poll_interval self.retry_interval = retry_interval # Amount of log entries to fetch before merging/de-duplication to # determine which records need to be processed. self.fetch_batch_size = fetch_batch_size # Amount of log entries to process in a batch. self.batch_size = batch_size self.keep_running = True self.workers = gevent.pool.Group() # Dictionary account_id -> semaphore to serialize action syncback for # any particular account. # TODO(emfree): We really only need to serialize actions that operate # on any given object. But IMAP actions are already effectively # serialized by using an IMAP connection pool of size 1, so it doesn't # matter too much. self.account_semaphores = defaultdict(lambda: BoundedSemaphore(1)) # This SyncbackService performs syncback for only and all the accounts # on shards it is reponsible for; shards are divided up between # running SyncbackServices. self.log = logger.new(component="syncback") syncback_assignments = { int(k): v for k, v in config.get("SYNCBACK_ASSIGNMENTS", {}).items() } if syncback_id in syncback_assignments: self.keys = [ key for key in engine_manager.engines if key in syncback_assignments[syncback_id] and key % total_processes == process_number ] else: self.log.warn("No shards assigned to syncback server", syncback_id=syncback_id) self.keys = [] self.log = logger.new(component="syncback") self.num_workers = num_workers self.num_idle_workers = 0 self.worker_did_finish = gevent.event.Event() self.worker_did_finish.clear() self.task_queue = Queue() self.running_action_ids = set() gevent.Greenlet.__init__(self)
def _get_from_s3(self): if not self.data_sha256: return None conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME'), validate=False) key = bucket.get_key(self.data_sha256) if not key: log.error('No key with name: {} returned!'.format( self.data_sha256)) return return key.get_contents_as_string()
def delete_from_blockstore(*data_sha256_hashes): log.info("deleting from blockstore", sha256=data_sha256_hashes) if STORE_MSG_ON_S3: _delete_from_s3_bucket(data_sha256_hashes, config.get("TEMP_MESSAGE_STORE_BUCKET_NAME")) else: for data_sha256 in data_sha256_hashes: _delete_from_disk(data_sha256)
def run(self): if config.get('DEBUG_PROFILING_ON'): # If config flag is set, get live top-level profiling output on # stdout by doing kill -SIGTRAP <sync_process>. # This slows things down so you probably don't want to do it # normally. attach_profiler() setproctitle('inbox-sync-{}'.format(self.cpu_id)) retry_with_logging(self._run_impl, self.log)
def _delete_from_s3_bucket(data_sha256_hashes, bucket_name): data_sha256_hashes = filter(None, data_sha256_hashes) if not data_sha256_hashes: return None assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!" assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!" start = time.time() # Boto pools connections at the class level conn = S3Connection(config.get("AWS_ACCESS_KEY_ID"), config.get("AWS_SECRET_ACCESS_KEY")) bucket = conn.get_bucket(bucket_name, validate=False) bucket.delete_keys([key for key in data_sha256_hashes], quiet=True) end = time.time() latency_millis = (end - start) * 1000 statsd_client.timing("s3_blockstore.delete_latency", latency_millis)
def create_auth_account(db_session, email_address, token, exit): uri = config.get('GOOGLE_OAUTH_REDIRECT_URI', None) if uri != 'urn:ietf:wg:oauth:2.0:oob': raise NotImplementedError('Callback-based OAuth is not supported') response = auth_account(email_address, token, exit) account = create_account(db_session, email_address, response) return account
def create_auth_account(db_session, email_address, token, exit): uri = config.get('GOOGLE_OAUTH_REDIRECT_URI', None) if uri != 'urn:ietf:wg:oauth:2.0:oob': raise NotImplementedError('Callback-based OAuth is not supported') response = _auth_account(email_address, token, exit) account = create_account(db_session, response) return account
def _save_to_s3(self, data): assert 'AWS_ACCESS_KEY_ID' in config, 'Need AWS key!' assert 'AWS_SECRET_ACCESS_KEY' in config, 'Need AWS secret!' assert 'MESSAGE_STORE_BUCKET_NAME' in config, \ 'Need bucket name to store message data!' # Boto pools connections at the class level conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'), config.get('AWS_SECRET_ACCESS_KEY')) bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME'), validate=False) # See if it already exists; if so, don't recreate. key = bucket.get_key(self.data_sha256) if key: return key = Key(bucket) key.key = self.data_sha256 key.set_contents_from_string(data)
def run(self): if config.get('DEBUG_PROFILING_ON'): # If config flag is set, get live top-level profiling output on # stdout by doing kill -SIGTRAP <sync_process>. # This slows things down so you probably don't want to do it # normally. attach_profiler() if config.get('DEBUG_CONSOLE_ON'): # Enable the debugging console if this flag is set. Connect to # localhost on the port shown in the logs to get access to a REPL port = None start_port = config.get('DEBUG_START_PORT') if start_port: port = start_port + self.cpu_id gevent.spawn(break_to_interpreter, port=port) setproctitle('inbox-sync-{}'.format(self.cpu_id)) retry_with_logging(self._run_impl, self.log)