示例#1
0
def distributed_send_message(message):
    # If I am the master, attempt sending my messages through my slaves.
    if coordinator.am_i_master():
        try:
            if coordinator.slave_count and coordinator.slaves:
                for i, address in enumerate(coordinator.slaves):
                    if i >= coordinator.slave_count:
                        logger.error('Failed using all configured slaves; resorting to local send_message')
                        break
                    if rpc.send_message_to_slave(message, address):
                        return True
        except StopIteration:
            logger.warning('No more slaves. Sending locally.')

    logger.info('Sending message (ID %s) locally', message.get('message_id', '?'))

    runtime = send_message(message)
    add_mode_stat(message['mode'], runtime)

    metrics_key = 'app_%(application)s_mode_%(mode)s_cnt' % message
    metrics.add_new_metrics({metrics_key: 0})
    metrics.incr(metrics_key)

    if runtime is not None:
        return True

    raise Exception('Failed sending message')
示例#2
0
文件: rpc.py 项目: shoxv/iris
def handle_api_notification_request(socket, address, req):
    notification = req['data']
    notification['subject'] = '[%(application)s] %(subject)s' % notification
    role = notification.get('role')
    if not role:
        reject_api_request(socket, address, 'INVALID role')
        logger.warn('Dropping OOB message with invalid role "%s" from app %s', role, notification.get('application'))
        return
    target = notification.get('target')
    if not target:
        reject_api_request(socket, address, 'INVALID target')
        logger.warn('Dropping OOB message with invalid target "%s" from app %s', target, notification.get('application'))
        return

    expanded_targets = cache.targets_for_role(role, target)
    if not expanded_targets:
        reject_api_request(socket, address, 'INVALID role:target')
        logger.warn('Dropping OOB message with invalid role:target "%s:%s" from app %s', role, target, notification.get('application'))
        return

    # If we're rendering this using templates+context instead of body, fill in the
    # needed iris key.
    if notification.get('template') and notification.get('context'):
        notification['context']['iris'] = notification['context'].get('iris', {})

    logger.info('-> %s OK, to %s:%s (%s:%s)',
                address, role, target, notification.get('application', '?'), notification.get('priority', notification.get('mode', '?')))

    for _target in expanded_targets:
        temp_notification = notification.copy()
        temp_notification['target'] = _target
        send_queue.put(temp_notification)
    metrics.incr('notification_cnt')
    socket.sendall(msgpack.packb('OK'))
示例#3
0
文件: sender.py 项目: shoxv/iris
def distributed_send_message(message):
    if rpc.num_slaves and rpc.sender_slaves:
        for i, address in enumerate(rpc.sender_slaves):
            if i >= rpc.num_slaves:
                logger.error(
                    'Failed using all configured slaves; resorting to local send_message'
                )
                break
            if rpc.send_message_to_slave(message, address):
                return True

    logger.info('Sending message (ID %s) locally',
                message.get('message_id', '?'))

    runtime = send_message(message)
    add_mode_stat(message['mode'], runtime)

    metrics_key = 'app_%(application)s_mode_%(mode)s_cnt' % message
    metrics.add_new_metrics({metrics_key: 0})
    metrics.incr(metrics_key)

    if runtime is not None:
        return True

    raise Exception('Failed sending message')
示例#4
0
    def allow_send(self, message):
        application = message.get('application')

        if not application:
            return True

        # Purpose of quotas is to protect downstreams. If we're already going to drop this message,
        # don't let it account against quota.
        if message.get('mode') == 'drop':
            return True

        rate = self.rates.get(application)

        if not rate:
            return True

        hard_buckets, soft_buckets, hard_limit, soft_limit, wait_time, plan_name, target = rate

        # Increment both buckets for this minute
        hard_buckets[-1] += 1
        soft_buckets[-1] += 1

        # If hard limit breached, disallow sending this message and create incident
        hard_quota_usage = sum(hard_buckets)

        hard_usage_pct = 0
        if hard_limit > 0:
            hard_usage_pct = (hard_quota_usage // hard_limit) * 100
        metrics.set('app_%s_quota_hard_usage_pct' % application,
                    hard_usage_pct)

        if hard_quota_usage > hard_limit:
            metrics.incr('quota_hard_exceed_cnt')
            if plan_name:
                with self.last_incidents_mutex:
                    self.notify_incident(application, hard_limit,
                                         len(hard_buckets), plan_name,
                                         wait_time)
            return False

        # If soft limit breached, just notify owner and still send
        soft_quota_usage = sum(soft_buckets)

        soft_usage_pct = 0
        if soft_limit > 0:
            soft_usage_pct = (soft_quota_usage // soft_limit) * 100
        metrics.set('app_%s_quota_soft_usage_pct' % application,
                    soft_usage_pct)

        if soft_quota_usage > soft_limit:
            metrics.incr('quota_soft_exceed_cnt')
            if target:
                with self.last_soft_quota_notification_time_mutex:
                    self.notify_target(application, soft_limit,
                                       len(soft_buckets), *target)
            return True

        return True
示例#5
0
def main():
    boot_time = time.time()
    config = load_config()

    metrics.init(config, 'iris-owa-sync', default_metrics)

    owaconfig = config.get('owa')

    if not owaconfig:
        logger.critical('Missing OWA configs')
        sys.exit(1)

    api_host = owaconfig.get('api_host', 'http://localhost:16649')
    iris_client = IrisClient(api_host, 0, owaconfig['iris_app'],
                             owaconfig['iris_app_key'])

    proxies = owaconfig.get('proxies')

    # only way to configure a proxy is to monkey-patch (http adapter) a monkey-patch (baseprotocol) :/
    if proxies:
        UseProxyHttpAdapter._my_proxies = proxies
        exchangelib.protocol.BaseProtocol.HTTP_ADAPTER_CLS = UseProxyHttpAdapter

    creds = exchangelib.Credentials(**owaconfig['credentials'])

    try:
        nap_time = int(owaconfig.get('sleep_interval', 60))
    except ValueError:
        nap_time = 60

    while True:
        start_time = time.time()
        message_count = 0

        try:
            config = exchangelib.Configuration(credentials=creds,
                                               **owaconfig['config'])
            account = exchangelib.Account(config=config,
                                          access_type=exchangelib.DELEGATE,
                                          **owaconfig['account'])
        except (exchangelib.errors.EWSError,
                requests.exceptions.RequestException):
            logger.exception('Failed authenticating to OWA365')
            metrics.incr('owa_api_failure_count')
        else:
            logger.info('Receiving mail on behalf of %s',
                        owaconfig['account'].get('primary_smtp_address'))
            message_count = poll(account, iris_client)

        now = time.time()
        run_time = now - start_time
        logger.info(
            'Last run took %2.f seconds and processed %s messages. Waiting %s seconds until next poll..',
            run_time, message_count, nap_time)
        metrics.set('uptime', now - boot_time)
        metrics.emit()
        sleep(nap_time)
示例#6
0
def ldap_sync_loop(ldap_lists, engine, interval):
    sleep(60)
    while True:
        logger.info('Starting ldap sync...')
        try:
            run_start = time.time()
            sync_ldap_lists(ldap_lists, engine)
            logger.info('Ldap mailing list sync took %.2f seconds', time.time() - run_start)
        except Exception:
            metrics.incr('failed_tasks')
            logger.exception('Error syncing from ldap!')
        sleep(interval)
示例#7
0
def oncall_sync_loop(config, engine, interval):

    while True:
        logger.info('Starting oncall sync...')
        try:
            run_start = time.time()
            sync_from_oncall(config, engine)
            logger.info('oncall sync took %.2f seconds', time.time() - run_start)
        except Exception:
            metrics.incr('failed_tasks')
            logger.exception('Error syncing from oncall!')
        sleep(interval)
示例#8
0
def handle_slave_send(socket, address, req):
    message = req['data']
    message_id = message.get('message_id', '?')

    try:
        runtime = send_funcs['send_message'](message)
        add_mode_stat(message['mode'], runtime)

        metrics_key = 'app_%(application)s_mode_%(mode)s_cnt' % message
        metrics.add_new_metrics({metrics_key: 0})
        metrics.incr(metrics_key)

        if runtime is not None:
            response = 'OK'
            access_logger.info(
                'Message (ID %s) from master %s sent successfully', message_id,
                address)
            metrics.incr('slave_message_send_success_cnt')
        else:
            response = 'FAIL'
            access_logger.error(
                'Got falsy value from send_message for message (ID %s) from master %s: %s',
                message_id, address, runtime)
            metrics.incr('slave_message_send_fail_cnt')
    except Exception:
        response = 'FAIL'
        logger.exception('Sending message (ID %s) from master %s failed.')
        metrics.incr('slave_message_send_fail_cnt')

    socket.sendall(msgpack.packb(response))
示例#9
0
def send_message_to_slave(message, address):
    try:
        payload = generate_msgpack_message_payload(message)
    except TypeError:
        logger.exception('Failed encoding message %s as msgpack', message)
        metrics.incr('rpc_message_pass_fail_cnt')
        return False

    pretty_address = '%s:%s' % address
    message_id = message.get('message_id', '?')
    try:
        s = socket.create_connection(address)
        s.send(payload)
        sender_resp = msgpack_unpack_msg_from_socket(s)
        s.close()
    except socket.error:
        logging.exception('Failed connecting to %s to send message (ID %s)',
                          pretty_address, message_id)
        metrics.incr('rpc_message_pass_fail_cnt')
        return False

    if sender_resp == 'OK':
        access_logger.info(
            'Successfully passed message (ID %s) to %s for sending',
            message_id, pretty_address)
        metrics.incr('rpc_message_pass_success_cnt')
        return True
    else:
        logger.error('Failed sending message (ID %s) through %s: %s',
                     message_id, pretty_address, sender_resp)
        metrics.incr('rpc_message_pass_fail_cnt')
        return False
示例#10
0
def prune_target(engine, target_name, target_type):
    if target_type == 'user':
        metrics.incr('users_purged')
    else:
        metrics.incr('others_purged')

    if target_type == 'team':
        try:
            # rename team to prevent namespace conflict but preserve the ability to reactivate it in the future
            new_name = str(uuid.uuid4())
            engine.execute('''UPDATE `target` SET `active` = FALSE, `name` = %s WHERE `name` = %s AND `type_id` = (SELECT `id` FROM `target_type` WHERE `name` = %s)''', (new_name, target_name, target_type))
            logger.info('Deleted inactive oncall team %s', target_name)
        except SQLAlchemyError as e:
            logger.error('Deleting oncall team %s failed: %s', target_name, e)
            metrics.incr('sql_errors')
        return

    try:
        engine.execute('''DELETE FROM `target` WHERE `name` = %s AND `type_id` = (SELECT `id` FROM `target_type` WHERE `name` = %s)''', (target_name, target_type))
        logger.info('Deleted inactive target %s', target_name)

    # The user has messages or some other user data which should be preserved.
    # Just mark as inactive.
    except IntegrityError:
        logger.info('Marking target %s inactive', target_name)
        engine.execute('''UPDATE `target` SET `active` = FALSE WHERE `name` = %s AND `type_id` = (SELECT `id` FROM `target_type` WHERE `name` = %s)''', (target_name, target_type))

    except SQLAlchemyError as e:
        logger.error('Deleting target %s failed: %s', target_name, e)
        metrics.incr('sql_errors')
        return
示例#11
0
def stats_task():
    connection = db.engine.raw_connection()
    cursor = connection.cursor()
    cursor.execute('SELECT `id`, `name` FROM `application`')
    applications = [{'id': row[0], 'name': row[1]} for row in cursor]
    for app in applications:
        try:
            stats = app_stats.calculate_app_stats(app, connection, cursor)
            set_app_stats(app, stats, connection, cursor)
        except Exception:
            logger.exception('App stats calculation failed for app %s',
                             app['name'])
            metrics.incr('task_failure')
    cursor.close()
    connection.close()
示例#12
0
    def allow_send(self, message):
        application = message.get('application')

        if not application:
            return True

        rate = self.rates.get(application)

        if not rate:
            return True

        hard_buckets, soft_buckets, hard_limit, soft_limit, wait_time, plan_name, target = rate

        # Increment both buckets for this minute
        hard_buckets[-1] += 1
        soft_buckets[-1] += 1

        # If hard limit breached, disallow sending this message and create incident
        hard_quota_usage = sum(hard_buckets)

        hard_usage_pct = 0
        if hard_limit > 0:
            hard_usage_pct = (hard_quota_usage / hard_limit) * 100
        metrics.set('app_%s_quota_hard_usage_pct' % application,
                    hard_usage_pct)

        if hard_quota_usage > hard_limit:
            metrics.incr('quota_hard_exceed_cnt')
            self.notify_incident(application, hard_limit, len(hard_buckets),
                                 plan_name, wait_time)
            return False

        # If soft limit breached, just notify owner and still send
        soft_quota_usage = sum(soft_buckets)

        soft_usage_pct = 0
        if soft_limit > 0:
            soft_usage_pct = (soft_quota_usage / soft_limit) * 100
        metrics.set('app_%s_quota_soft_usage_pct' % application,
                    soft_usage_pct)

        if soft_quota_usage > soft_limit:
            metrics.incr('quota_soft_exceed_cnt')
            self.notify_target(application, soft_limit, len(soft_buckets),
                               *target)
            return True

        return True
示例#13
0
def prune_user(engine, username):
    metrics.incr('users_purged')

    try:
        engine.execute('''DELETE FROM `target` WHERE `name` = %s AND `type_id` = (SELECT `id` FROM `target_type` WHERE `name` = 'user')''', username)
        logger.info('Deleted inactive user %s', username)

    # The user has messages or some other user data which should be preserved.
    # Just mark as inactive.
    except IntegrityError:
        logger.info('Marking user %s inactive', username)
        engine.execute('''UPDATE `target` SET `active` = FALSE WHERE `name` = %s AND `type_id` = (SELECT `id` FROM `target_type` WHERE `name` = 'user')''', username)

    except SQLAlchemyError as e:
        logger.error('Deleting user %s failed: %s', username, e)
        metrics.incr('sql_errors')
示例#14
0
def handle_slave_send(socket, address, req):
    message = req['data']
    message_id = message.get('message_id', '?')

    message['to_slave'] = True

    try:
        runtime = send_funcs['message_send_enqueue'](message)
        response = 'OK'
        access_logger.info('Message (ID %s) from master %s queued successfully', message_id, address)
    except Exception:
        response = 'FAIL'
        logger.exception('Queueing message (ID %s) from master %s failed.')
        access_logger.error('Failed queueing message (ID %s) from master %s: %s', message_id, address, runtime)
        metrics.incr('slave_message_send_fail_cnt')

    socket.sendall(msgpack.packb(response))
示例#15
0
def stats_task(connection, cursor):

    cursor.execute('SELECT `id`, `name` FROM `application`')
    applications = [{'id': row[0], 'name': row[1]} for row in cursor]
    # clean up old app stats
    cursor.execute('''DELETE FROM `application_stats`''')
    for app in applications:
        try:
            stats = app_stats.calculate_app_stats(app, connection, cursor)
            set_app_stats(app, stats, connection, cursor)
        except Exception:
            logger.exception('App stats calculation failed for app %s',
                             app['name'])
            metrics.incr('task_failure')
    try:
        stats = app_stats.calculate_global_stats(connection, cursor)
        set_global_stats(stats, connection, cursor)
    except Exception:
        logger.exception('Global stats calculation failed')
        metrics.incr('task_failure')
示例#16
0
文件: owasync.py 项目: marafa/iris
def relay(message, iris_client):
    # Get headers into the format the iris expects from gmail
    headers = [{
        'name': header.name,
        'value': header.value
    } for header in message.headers]

    # If this is a bulk message or an auto reply or something else, don't bother sending it to iris-api
    if is_pointless_message(headers):
        logger.info('Not relaying pointless message %s (from %s) to iris-api',
                    message.message_id, message.sender.email_address)
        metrics.incr('message_ignore_count')
        return

    # To and From headers are strangely missing
    if message.to_recipients:
        headers.append({
            'name': 'To',
            'value': message.to_recipients[0].email_address
        })
    headers.append({'name': 'From', 'value': message.sender.email_address})

    data = {'headers': headers, 'body': message.text_body.strip()}

    try:
        iris_client.post('response/email', json=data).raise_for_status()
        metrics.incr('message_relay_success_count')
    except requests.exceptions.RequestException:
        metrics.incr('message_relay_failure_count')
        logger.exception('Failed posting message %s (from %s) to iris-api',
                         message.message_id, message.sender.email_address)
示例#17
0
def handle_api_request(socket, address):
    metrics.incr('api_request_cnt')
    timeout = Timeout.start_new(rpc_timeout)
    try:
        req = msgpack_unpack_msg_from_socket(socket)
        if not req:
            logger.warning('Couldn\'t get msgpack data from %s', address)
            socket.close()
            return
        access_logger.info('%s %s', address, req['endpoint'])
        handler = api_request_handlers.get(req['endpoint'])
        if handler is not None:
            handler(socket, address, req)
        else:
            logger.info('-> %s unknown request', address)
            socket.sendall(msgpack.packb('UNKNOWN'))
    except Timeout:
        metrics.incr('api_request_timeout_cnt')
        logger.warning('-> %s timeout', address)
        socket.sendall(msgpack.packb('TIMEOUT'))
    finally:
        timeout.cancel()
    socket.close()
示例#18
0
文件: owasync.py 项目: p6knewman/iris
def poll(account, iris_client):

    try:
        metrics.set('total_inbox_count', account.inbox.total_count)
        metrics.set('unread_inbox_count', account.inbox.unread_count)
    except (exchangelib.errors.EWSError, requests.exceptions.RequestException):
        logger.exception('Failed to gather inbox counts from OWA API')
        metrics.incr('owa_api_failure_count')

    processed_messages = 0
    messages_to_mark_read = []

    try:
        for message in account.inbox.filter(
                is_read=False).order_by('-datetime_received'):
            processed_messages += 1

            try:
                relay(message, iris_client)
            except Exception:
                logger.exception('Uncaught exception during message relaying')
                metrics.incr('message_relay_failure_count')

            # Mark it as read in bulk later. (This syntax isn't documented)
            message.is_read = True
            messages_to_mark_read.append((message, ('is_read', )))

    except (exchangelib.errors.EWSError, requests.exceptions.RequestException):
        logger.exception('Failed to iterate through inbox')
        metrics.incr('owa_api_failure_count')

    if messages_to_mark_read:
        bulk_update_count = len(messages_to_mark_read)
        logger.info('will mark %s messages as read', bulk_update_count)
        try:
            account.bulk_update(items=messages_to_mark_read)
        except (exchangelib.errors.EWSError,
                requests.exceptions.RequestException):
            logger.exception(
                'Failed to update read status on %s messages in bulk',
                bulk_update_count)
            metrics.incr('owa_api_failure_count')

    metrics.set('message_process_count', processed_messages)
    return processed_messages
示例#19
0
文件: sender.py 项目: shoxv/iris
def main():

    global config
    config = load_config()

    is_master = config['sender'].get('is_master', False)
    logger.info('[-] bootstraping sender (master: %s)...', is_master)
    init_sender(config)
    init_plugins(config.get('plugins', {}))
    init_vendors(config.get('vendors', []), config.get('applications', []))

    send_task = spawn(send)
    worker_tasks = [spawn(worker) for x in xrange(100)]
    if is_master:
        if should_mock_gwatch_renewer:
            spawn(mock_gwatch_renewer)
        else:
            spawn(gwatch_renewer)
        spawn(prune_old_audit_logs_worker)

    rpc.init(config['sender'], dict(send_message=send_message))
    rpc.run(config['sender'])

    interval = 60
    logger.info('[*] sender bootstrapped')
    while True:
        runtime = int(time.time())
        logger.info('--> sender looop started.')

        cache.refresh()
        cache.purge()

        if is_master:
            try:
                escalate()
                deactivate()
                poll()
                aggregate(runtime)
            except Exception:
                metrics.incr('task_failure')
                logger.exception("Exception occured in main loop.")

        # check status for all background greenlets and respawn if necessary
        if not bool(send_task):
            logger.error("send task failed, %s", send_task.exception)
            metrics.incr('task_failure')
            send_task = spawn(send)
        bad_workers = []
        for i, task in enumerate(worker_tasks):
            if not bool(task):
                logger.error("worker task failed, %s", task.exception)
                metrics.incr('task_failure')
                bad_workers.append(i)
        for i in bad_workers:
            worker_tasks[i] = spawn(worker)

        spawn(metrics.emit)

        now = time.time()
        elapsed_time = now - runtime
        nap_time = max(0, interval - elapsed_time)
        logger.info(
            '--> sender loop finished in %s seconds - sleeping %s seconds',
            elapsed_time, nap_time)
        sleep(nap_time)
示例#20
0
def sync_ldap_lists(ldap_settings, engine):
    try:
        l = ldap.ldapobject.ReconnectLDAPObject(
            ldap_settings['connection']['url'])
    except Exception:
        logger.exception('Connecting to ldap to get our mailing lists failed.')
        return

    try:
        l.simple_bind_s(*ldap_settings['connection']['bind_args'])
    except Exception:
        logger.exception('binding to ldap to get our mailing lists failed.')
        return

    session = sessionmaker(bind=engine)()

    mailing_list_type_name = 'mailing-list'

    list_type_id = session.execute(
        'SELECT `id` FROM `target_type` WHERE `name` = :name', {
            'name': mailing_list_type_name
        }).scalar()
    if not list_type_id:
        try:
            list_type_id = session.execute(
                'INSERT INTO `target_type` (`name`) VALUES (:name)', {
                    'name': mailing_list_type_name
                }).lastrowid
            session.commit()
            logger.info('Created target_type "%s" with id %s',
                        mailing_list_type_name, list_type_id)
        except (IntegrityError, DataError):
            logger.exeption('Failed creating mailing-list type ID')
            return

    ldap_add_pause_interval = ldap_settings.get('user_add_pause_interval',
                                                None)
    ldap_add_pause_duration = ldap_settings.get('user_add_pause_duration', 1)

    ldap_lists = get_ldap_lists(l, ldap_settings['search_strings'])
    ldap_lists_count = len(ldap_lists)
    metrics.set('ldap_lists_found', ldap_lists_count)
    metrics.set('ldap_memberships_found', 0)
    logger.info('Found %s ldap lists', ldap_lists_count)

    existing_ldap_lists = {
        row[0]
        for row in session.execute(
            '''SELECT `name` FROM `target` WHERE `target`.`type_id` = :type_id''',
            {'type_id': list_type_id})
    }
    kill_lists = existing_ldap_lists - {item[1] for item in ldap_lists}
    if kill_lists:
        metrics.incr('ldap_lists_removed', len(kill_lists))
        for ldap_list in kill_lists:
            prune_target(engine, ldap_list, mailing_list_type_name)

    user_add_count = 0

    for list_cn, list_name in ldap_lists:
        try:
            members = get_ldap_flat_membership(l,
                                               ldap_settings['search_strings'],
                                               list_cn,
                                               ldap_settings['max_depth'], 0,
                                               set())
        except ldap.SERVER_DOWN:
            # reconnect and retry once
            metrics.incr('ldap_reconnects')
            logger.warning('LDAP server went away for list %s. Reconnecting',
                           list_name)
            l.reconnect(ldap_settings['connection']['url'])
            members = get_ldap_flat_membership(l,
                                               ldap_settings['search_strings'],
                                               list_cn,
                                               ldap_settings['max_depth'], 0,
                                               set())

        if not members:
            logger.info('Ignoring/pruning empty ldap list %s', list_name)
            continue

        num_members = len(members)
        metrics.incr('ldap_memberships_found', num_members)

        created = False
        list_id = session.execute(
            '''SELECT `mailing_list`.`target_id`
                                     FROM `mailing_list`
                                     JOIN `target` on `target`.`id` = `mailing_list`.`target_id`
                                     WHERE `target`.`name` = :name''', {
                'name': list_name
            }).scalar()

        if not list_id:
            try:
                list_id = session.execute(
                    '''INSERT INTO `target` (`type_id`, `name`)
                                             VALUES (:type_id, :name)''', {
                        'type_id': list_type_id,
                        'name': list_name
                    }).lastrowid
                session.commit()
            except (IntegrityError, DataError):
                logger.exception(
                    'Failed adding row to target table for mailing list %s. Skipping this list.',
                    list_name)
                metrics.incr('ldap_lists_failed_to_add')
                continue

            try:
                session.execute(
                    '''INSERT INTO `mailing_list` (`target_id`, `count`) VALUES (:list_id, :count)''',
                    {
                        'list_id': list_id,
                        'count': num_members
                    })
                session.commit()
            except (IntegrityError, DataError):
                logger.exception(
                    'Failed adding row to mailing_list table for mailing list %s (ID: %s). Skipping this list.',
                    list_name, list_id)
                metrics.incr('ldap_lists_failed_to_add')
                continue

            logger.info('Created list %s with id %s', list_name, list_id)
            metrics.incr('ldap_lists_added')
            created = True

        if not created:
            session.execute(
                'UPDATE `mailing_list` SET `count` = :count WHERE `target_id` = :list_id',
                {
                    'count': num_members,
                    'list_id': list_id
                })
            session.commit()

        existing_members = {
            row[0]
            for row in session.execute(
                '''
                            SELECT `target_contact`.`destination`
                            FROM `mailing_list_membership`
                            JOIN `target_contact` ON `target_contact`.`target_id` = `mailing_list_membership`.`user_id`
                            WHERE `mailing_list_membership`.`list_id` = :list_id
                            AND `target_contact`.`mode_id` = (SELECT `id` FROM `mode` WHERE `name` = 'email')
                            ''', {'list_id': list_id})
        }

        add_members = members - existing_members
        kill_members = existing_members - members

        if add_members:
            metrics.incr('ldap_memberships_added', len(add_members))

            for member in add_members:
                try:
                    session.execute(
                        '''INSERT IGNORE INTO `mailing_list_membership`
                                       (`list_id`, `user_id`)
                                       VALUES (:list_id,
                                               (SELECT `target_id` FROM `target_contact`
                                                JOIN `target` ON `target`.`id` = `target_id`
                                                WHERE `destination` = :name
                                                AND `mode_id` = (SELECT `id` FROM `mode` WHERE `name` = 'email')
                                                AND `target`.`type_id` = (SELECT `id` FROM `target_type` WHERE `name` = 'user')))
                                    ''', {
                            'list_id': list_id,
                            'name': member
                        })
                    logger.info('Added %s to list %s', member, list_name)
                except (IntegrityError, DataError):
                    metrics.incr('ldap_memberships_failed_to_add')
                    logger.warn('Failed adding %s to %s', member, list_name)

                user_add_count += 1
                if (ldap_add_pause_interval is not None) and (
                        user_add_count % ldap_add_pause_interval) == 0:
                    logger.info('Pausing for %s seconds every %s users.',
                                ldap_add_pause_duration,
                                ldap_add_pause_interval)
                    time.sleep(ldap_add_pause_duration)

        if kill_members:
            metrics.incr('ldap_memberships_removed', len(kill_members))
            batch_remove_ldap_memberships(session, list_id, kill_members)

    session.commit()
    session.close()
示例#21
0
def sync_from_oncall(config, engine, purge_old_users=True):
    # users and teams present in our oncall database
    oncall_base_url = config.get('oncall-api')

    if not oncall_base_url:
        logger.error(
            'Missing URL to oncall-api, which we use for user/team lookups. Bailing.'
        )
        return

    oncall = oncallclient.OncallClient(config.get('oncall-app', ''),
                                       config.get('oncall-key', ''),
                                       oncall_base_url)
    oncall_users = fetch_users_from_oncall(oncall)

    if not oncall_users:
        logger.warning('No users found. Bailing.')
        return

    oncall_team_names = fetch_teams_from_oncall(oncall)

    if not oncall_team_names:
        logger.warning('We do not have a list of team names')

    oncall_team_names = set(oncall_team_names)

    session = sessionmaker(bind=engine)()

    # users present in iris' database
    iris_users = {}
    for row in engine.execute(
            '''SELECT `target`.`name` as `name`, `mode`.`name` as `mode`,
                                        `target_contact`.`destination`
                                 FROM `target`
                                 JOIN `user` on `target`.`id` = `user`.`target_id`
                                 LEFT OUTER JOIN `target_contact` ON `target`.`id` = `target_contact`.`target_id`
                                 LEFT OUTER JOIN `mode` ON `target_contact`.`mode_id` = `mode`.`id`
                                 WHERE `target`.`active` = TRUE
                                 ORDER BY `target`.`name`'''):
        contacts = iris_users.setdefault(row.name, {})
        if row.mode is None or row.destination is None:
            continue
        contacts[row.mode] = row.destination

    iris_usernames = iris_users.viewkeys()

    # users from the oncall endpoints and config files
    metrics.set('users_found', len(oncall_users))
    metrics.set('teams_found', len(oncall_team_names))
    oncall_users.update(get_predefined_users(config))
    oncall_usernames = oncall_users.viewkeys()

    # set of users not presently in iris
    users_to_insert = oncall_usernames - iris_usernames
    # set of existing iris users that are in the user oncall database
    users_to_update = iris_usernames & oncall_usernames
    users_to_mark_inactive = iris_usernames - oncall_usernames

    # get objects needed for insertion
    target_types = {
        name: id
        for name, id in session.execute(
            'SELECT `name`, `id` FROM `target_type`')
    }  # 'team' and 'user'
    modes = {
        name: id
        for name, id in session.execute('SELECT `name`, `id` FROM `mode`')
    }
    iris_team_names = {
        name
        for (name, ) in engine.execute(
            '''SELECT `name` FROM `target` WHERE `type_id` = %s''',
            target_types['team'])
    }

    target_add_sql = 'INSERT INTO `target` (`name`, `type_id`) VALUES (%s, %s) ON DUPLICATE KEY UPDATE `active` = TRUE'
    user_add_sql = 'INSERT IGNORE INTO `user` (`target_id`) VALUES (%s)'
    target_contact_add_sql = '''INSERT INTO `target_contact` (`target_id`, `mode_id`, `destination`)
                                VALUES (%s, %s, %s)
                                ON DUPLICATE KEY UPDATE `destination` = %s'''

    # insert users that need to be
    logger.info('Users to insert (%d)' % len(users_to_insert))
    for username in users_to_insert:
        logger.info('Inserting %s' % username)
        try:
            target_id = engine.execute(
                target_add_sql, (username, target_types['user'])).lastrowid
            engine.execute(user_add_sql, (target_id, ))
        except SQLAlchemyError as e:
            metrics.incr('users_failed_to_add')
            metrics.incr('sql_errors')
            logger.exception('Failed to add user %s' % username)
            continue
        metrics.incr('users_added')
        for key, value in oncall_users[username].iteritems():
            if value and key in modes:
                logger.info('%s: %s -> %s' % (username, key, value))
                engine.execute(target_contact_add_sql,
                               (target_id, modes[key], value, value))

    # update users that need to be
    contact_update_sql = 'UPDATE target_contact SET destination = %s WHERE target_id = (SELECT id FROM target WHERE name = %s) AND mode_id = %s'
    contact_insert_sql = 'INSERT INTO target_contact (target_id, mode_id, destination) VALUES ((SELECT id FROM target WHERE name = %s), %s, %s)'
    contact_delete_sql = 'DELETE FROM target_contact WHERE target_id = (SELECT id FROM target WHERE name = %s) AND mode_id = %s'

    logger.info('Users to update (%d)' % len(users_to_update))
    for username in users_to_update:
        try:
            db_contacts = iris_users[username]
            oncall_contacts = oncall_users[username]
            for mode in modes:
                if mode in oncall_contacts and oncall_contacts[mode]:
                    if mode in db_contacts:
                        if oncall_contacts[mode] != db_contacts[mode]:
                            logger.info('%s: updating %s' % (username, mode))
                            metrics.incr('user_contacts_updated')
                            engine.execute(
                                contact_update_sql,
                                (oncall_contacts[mode], username, modes[mode]))
                    else:
                        logger.info('%s: adding %s' % (username, mode))
                        metrics.incr('user_contacts_updated')
                        engine.execute(
                            contact_insert_sql,
                            (username, modes[mode], oncall_contacts[mode]))
                elif mode in db_contacts:
                    logger.info('%s: deleting %s' % (username, mode))
                    metrics.incr('user_contacts_updated')
                    engine.execute(contact_delete_sql, (username, modes[mode]))
                else:
                    logger.debug('%s: missing %s' % (username, mode))
        except SQLAlchemyError as e:
            metrics.incr('users_failed_to_update')
            metrics.incr('sql_errors')
            logger.exception('Failed to update user %s' % username)
            continue

    # sync teams between iris and oncall
    teams_to_insert = oncall_team_names - iris_team_names
    teams_to_deactivate = iris_team_names - oncall_team_names

    logger.info('Teams to insert (%d)' % len(teams_to_insert))
    for t in teams_to_insert:
        logger.info('Inserting %s' % t)
        try:
            target_id = engine.execute(target_add_sql,
                                       (t, target_types['team'])).lastrowid
            metrics.incr('teams_added')
        except SQLAlchemyError as e:
            logger.exception('Error inserting team %s: %s' % (t, e))
            metrics.incr('teams_failed_to_add')
            continue
    session.commit()
    session.close()

    # mark users/teams inactive
    if purge_old_users:
        logger.info('Users to mark inactive (%d)' %
                    len(users_to_mark_inactive))
        for username in users_to_mark_inactive:
            prune_target(engine, username, 'user')
        for team in teams_to_deactivate:
            prune_target(engine, team, 'team')
示例#22
0
def sync_from_oncall(config, engine, purge_old_users=True):
    # users and teams present in our oncall database
    oncall_base_url = config.get('oncall-api')

    if not oncall_base_url:
        logger.error(
            'Missing URL to oncall-api, which we use for user/team lookups. Bailing.'
        )
        return

    oncall = oncallclient.OncallClient(config.get('oncall-app', ''),
                                       config.get('oncall-key', ''),
                                       oncall_base_url)
    oncall_users = fetch_users_from_oncall(oncall)

    if not oncall_users:
        logger.warning('No users found. Bailing.')
        return

    # get teams from oncall-api and separate the list of tuples into two lists of name and ids
    oncall_teams_api_response = fetch_teams_from_oncall(oncall)
    if not oncall_teams_api_response:
        logger.warning('No teams found. Bailing.')
        return

    oncall_team_response = list(zip(*oncall_teams_api_response))
    oncall_team_names = [name.lower() for name in oncall_team_response[0]]
    oncall_team_ids = oncall_team_response[1]
    oncall_response_dict_name_key = dict(
        zip(oncall_team_names, oncall_team_ids))
    oncall_response_dict_id_key = dict(zip(oncall_team_ids, oncall_team_names))
    oncall_case_sensitive_dict = {
        name.lower(): name
        for name in oncall_team_response[0]
    }

    if not oncall_team_names:
        logger.warning('We do not have a list of team names')

    oncall_team_names = set(oncall_team_names)
    oncall_team_ids = set(oncall_team_ids)

    session = sessionmaker(bind=engine)()

    # users present in iris' database
    iris_users = {}
    for row in engine.execute(
            '''SELECT `target`.`name` as `name`, `mode`.`name` as `mode`,
                                        `target_contact`.`destination`
                                 FROM `target`
                                 JOIN `user` on `target`.`id` = `user`.`target_id`
                                 LEFT OUTER JOIN `target_contact` ON `target`.`id` = `target_contact`.`target_id`
                                 LEFT OUTER JOIN `mode` ON `target_contact`.`mode_id` = `mode`.`id`
                                 WHERE `target`.`active` = TRUE
                                 ORDER BY `target`.`name`'''):
        contacts = iris_users.setdefault(row.name, {})
        if row.mode is None or row.destination is None:
            continue
        contacts[row.mode] = row.destination

    iris_usernames = iris_users.keys()

    # users from the oncall endpoints and config files
    metrics.set('users_found', len(oncall_users))
    metrics.set('teams_found', len(oncall_team_names))
    oncall_users.update(get_predefined_users(config))
    oncall_usernames = oncall_users.keys()

    # set of users not presently in iris
    users_to_insert = oncall_usernames - iris_usernames
    # set of existing iris users that are in the user oncall database
    users_to_update = iris_usernames & oncall_usernames
    users_to_mark_inactive = iris_usernames - oncall_usernames

    # get objects needed for insertion
    target_types = {
        name: target_id
        for name, target_id in session.execute(
            'SELECT `name`, `id` FROM `target_type`')
    }  # 'team' and 'user'
    modes = {
        name: mode_id
        for name, mode_id in session.execute('SELECT `name`, `id` FROM `mode`')
    }
    iris_team_names = {
        name.lower()
        for (name, ) in engine.execute(
            '''SELECT `name` FROM `target` WHERE `type_id` = %s''',
            target_types['team'])
    }
    target_add_sql = 'INSERT INTO `target` (`name`, `type_id`) VALUES (%s, %s) ON DUPLICATE KEY UPDATE `active` = TRUE'
    oncall_add_sql = 'INSERT INTO `oncall_team` (`target_id`, `oncall_team_id`) VALUES (%s, %s)'
    user_add_sql = 'INSERT IGNORE INTO `user` (`target_id`) VALUES (%s)'
    target_contact_add_sql = '''INSERT INTO `target_contact` (`target_id`, `mode_id`, `destination`)
                                VALUES (%s, %s, %s)
                                ON DUPLICATE KEY UPDATE `destination` = %s'''

    # insert users that need to be
    logger.info('Users to insert (%d)', len(users_to_insert))
    for username in users_to_insert:
        sleep(update_sleep)
        logger.info('Inserting %s', username)
        try:
            target_id = engine.execute(
                target_add_sql, (username, target_types['user'])).lastrowid
            engine.execute(user_add_sql, (target_id, ))
        except SQLAlchemyError as e:
            metrics.incr('users_failed_to_add')
            metrics.incr('sql_errors')
            logger.exception('Failed to add user %s' % username)
            continue
        metrics.incr('users_added')
        for key, value in oncall_users[username].items():
            if value and key in modes:
                logger.info('%s: %s -> %s', username, key, value)
                try:
                    engine.execute(target_contact_add_sql,
                                   (target_id, modes[key], value, value))
                except SQLAlchemyError as e:
                    logger.exception('Error adding contact for target id: %s',
                                     target_id)
                    metrics.incr('sql_errors')
                    continue

    # update users that need to be
    contact_update_sql = 'UPDATE target_contact SET destination = %s WHERE target_id = (SELECT id FROM target WHERE name = %s AND type_id = %s) AND mode_id = %s'
    contact_insert_sql = 'INSERT INTO target_contact (target_id, mode_id, destination) VALUES ((SELECT id FROM target WHERE name = %s AND type_id = %s), %s, %s)'
    contact_delete_sql = 'DELETE FROM target_contact WHERE target_id = (SELECT id FROM target WHERE name = %s AND type_id = %s) AND mode_id = %s'

    logger.info('Users to update (%d)', len(users_to_update))
    for username in users_to_update:
        sleep(update_sleep)
        try:
            db_contacts = iris_users[username]
            oncall_contacts = oncall_users[username]
            for mode in modes:
                if mode in oncall_contacts and oncall_contacts[mode]:
                    if mode in db_contacts:
                        if oncall_contacts[mode] != db_contacts[mode]:
                            logger.info('%s: updating %s', username, mode)
                            metrics.incr('user_contacts_updated')
                            engine.execute(contact_update_sql,
                                           (oncall_contacts[mode], username,
                                            target_types['user'], modes[mode]))
                    else:
                        logger.info('%s: adding %s', username, mode)
                        metrics.incr('user_contacts_updated')
                        engine.execute(contact_insert_sql,
                                       (username, target_types['user'],
                                        modes[mode], oncall_contacts[mode]))
                elif mode in db_contacts:
                    logger.info('%s: deleting %s', username, mode)
                    metrics.incr('user_contacts_updated')
                    engine.execute(
                        contact_delete_sql,
                        (username, target_types['user'], modes[mode]))
                else:
                    logger.debug('%s: missing %s', username, mode)
        except SQLAlchemyError as e:
            metrics.incr('users_failed_to_update')
            metrics.incr('sql_errors')
            logger.exception('Failed to update user %s', username)
            continue

# sync teams between iris and oncall

# iris_db_oncall_team_ids (team_ids in the oncall_team table)
# oncall_team_ids (team_ids from oncall api call)
# oncall_team_names (names from oncall api call)
# oncall_response_dict_name_key (key value pairs of oncall team names and ids from api call)
# oncall_response_dict_id_key same as above but key value inverted
# oncall_case_sensitive_dict maps the case insensitive oncall name to the original capitalization
# iris_team_names (names from target table)
# iris_target_name_id_dict dictionary of target name -> target_id mappings
# iris_db_oncall_team_id_name_dict dictionary of oncall team_id -> oncall name mappings

# get all incoming names that match a target check if that target has an entry in oncall table if not make one
    iris_target_name_id_dict = {
        name.lower(): target_id
        for name, target_id in engine.execute(
            '''SELECT `name`, `id` FROM `target` WHERE `type_id` = %s''',
            target_types['team'])
    }

    matching_target_names = iris_team_names.intersection(oncall_team_names)
    if matching_target_names:
        existing_up_to_date_oncall_teams = {
            name.lower()
            for (name, ) in session.execute(
                '''SELECT `target`.`name` FROM `target` JOIN `oncall_team` ON `oncall_team`.`target_id` = `target`.`id` WHERE `target`.`name` IN :matching_names''',
                {'matching_names': tuple(matching_target_names)})
        }
        # up to date target names that don't have an entry in the oncall_team table yet
        matching_target_names_no_oncall_entry = matching_target_names - existing_up_to_date_oncall_teams

        for t in matching_target_names_no_oncall_entry:
            logger.info('Inserting existing team into oncall_team %s', t)
            sleep(update_sleep)
            try:
                engine.execute(
                    '''UPDATE `target` SET `active` = TRUE WHERE `id` = %s''',
                    iris_target_name_id_dict[t])
                engine.execute(oncall_add_sql,
                               (iris_target_name_id_dict[t],
                                oncall_response_dict_name_key[t]))
            except SQLAlchemyError as e:
                logger.exception('Error inserting oncall_team %s: %s', t, e)
                metrics.incr('sql_errors')
                continue

# rename all mismatching target names

    iris_db_oncall_team_id_name_dict = {
        team_id: name.lower()
        for name, team_id in engine.execute(
            '''SELECT target.name, oncall_team.oncall_team_id FROM `target` JOIN `oncall_team` ON oncall_team.target_id = target.id'''
        )
    }

    iris_db_oncall_team_ids = {
        oncall_team_id
        for (oncall_team_id, ) in engine.execute(
            '''SELECT `oncall_team_id` FROM `oncall_team`''')
    }
    matching_oncall_ids = oncall_team_ids.intersection(iris_db_oncall_team_ids)

    name_swaps = {}

    # find teams in the iris database whose names have changed
    for oncall_id in matching_oncall_ids:

        current_name = iris_db_oncall_team_id_name_dict[oncall_id]
        new_name = oncall_response_dict_id_key[oncall_id]
        try:
            if current_name != new_name:
                # handle edge case of teams swapping names
                if not iris_target_name_id_dict.get(new_name, None):
                    target_id_to_rename = iris_target_name_id_dict[
                        current_name]
                    logger.info('Renaming team %s to %s', current_name,
                                new_name)
                    engine.execute(
                        '''UPDATE `target` SET `name` = %s, `active` = TRUE WHERE `id` = %s''',
                        (oncall_case_sensitive_dict[new_name],
                         target_id_to_rename))
                else:
                    # there is a team swap so rename to a random name to prevent a violation of unique target name constraint
                    new_name = str(uuid.uuid4())
                    target_id_to_rename = iris_target_name_id_dict[
                        current_name]
                    name_swaps[oncall_id] = target_id_to_rename
                    logger.info('Renaming team %s to %s', current_name,
                                new_name)
                    engine.execute(
                        '''UPDATE `target` SET `name` = %s, `active` = TRUE WHERE `id` = %s''',
                        (new_name, target_id_to_rename))
                sleep(update_sleep)
        except SQLAlchemyError as e:
            logger.exception('Error changing team name of %s to %s',
                             current_name, new_name)
            metrics.incr('sql_errors')

    # go back and rename name_swaps to correct value
    for oncall_id, target_id_to_rename in name_swaps.items():
        new_name = oncall_response_dict_id_key[oncall_id]
        try:
            engine.execute(
                '''UPDATE `target` SET `name` = %s, `active` = TRUE WHERE `id` = %s''',
                (oncall_case_sensitive_dict[new_name], target_id_to_rename))
        except SQLAlchemyError as e:
            logger.exception('Error renaming target: %s', new_name)
            metrics.incr('sql_errors')
            continue
        sleep(update_sleep)


# create new entries for new teams

# if the team_id doesn't exist in oncall_team at this point then it is a new team.
    new_team_ids = oncall_team_ids - iris_db_oncall_team_ids
    logger.info('Teams to insert (%d)' % len(new_team_ids))

    for team_id in new_team_ids:
        t = oncall_case_sensitive_dict[oncall_response_dict_id_key[team_id]]
        new_target_id = None

        # add team to target table
        logger.info('Inserting team %s', t)
        sleep(update_sleep)
        try:
            new_target_id = engine.execute(target_add_sql,
                                           (t, target_types['team'])).lastrowid
            metrics.incr('teams_added')
        except SQLAlchemyError as e:
            logger.exception('Error inserting team %s: %s', t, e)
            metrics.incr('teams_failed_to_add')
            metrics.incr('sql_errors')
            continue

        # add team to oncall_team table
        if new_target_id:
            logger.info('Inserting new team into oncall_team %s', t)
            try:
                engine.execute(oncall_add_sql, (new_target_id, team_id))
            except SQLAlchemyError as e:
                logger.exception('Error inserting oncall_team %s: %s', t, e)
                metrics.incr('sql_errors')
                continue
    session.commit()
    session.close()

    # mark users/teams inactive
    if purge_old_users:
        # find active teams that don't exist in oncall anymore
        updated_iris_team_names = {
            name.lower()
            for (name, ) in engine.execute(
                '''SELECT `name` FROM `target` WHERE `type_id` = %s AND `active` = TRUE''',
                target_types['team'])
        }
        teams_to_deactivate = updated_iris_team_names - oncall_team_names

        logger.info('Users to mark inactive (%d)' %
                    len(users_to_mark_inactive))
        logger.info('Teams to mark inactive (%d)' % len(teams_to_deactivate))
        for username in users_to_mark_inactive:
            prune_target(engine, username, 'user')
            sleep(update_sleep)
        for team in teams_to_deactivate:
            prune_target(engine, team, 'team')
            sleep(update_sleep)
示例#23
0
def process_retention(engine, max_days, batch_size, cooldown_time,
                      archive_path):
    time_start = time.time()

    connection = engine.raw_connection()
    cursor = connection.cursor(engine.dialect.dbapi.cursors.SSCursor)

    deleted_incidents = 0
    deleted_messages = 0
    deleted_comments = 0

    # First, archive/kill incidents and their messages
    while True:

        # Get incidents to archive and kill, in batches
        try:
            cursor.execute(
                '''
                    SELECT
                        %s
                    FROM `incident`
                    LEFT JOIN `plan` on `plan`.`id` = `incident`.`plan_id`
                    LEFT JOIN `application` on `application`.`id` = `incident`.`application_id`
                    LEFT JOIN `target` ON `incident`.`owner_id` = `target`.`id`
                    WHERE `incident`.`created` < (CURDATE() - INTERVAL %%s DAY)
                    LIMIT %%s
                ''' % (', '.join(field[0] for field in incident_fields)),
                [max_days, batch_size])
        except Exception:
            logger.exception('Failed getting incidents')
            try:
                cursor.close()
            except Exception:
                pass
            cursor = connection.cursor(engine.dialect.dbapi.cursors.SSCursor)
            break

        incident_ids = deque()

        for incident in cursor:
            archive_incident(incident, archive_path)
            incident_ids.append(incident[0])

        if not incident_ids:
            break

        logger.info('Archived %d incidents', len(incident_ids))

        # Then, Archive+Kill all comments in these incidents
        while True:

            try:
                cursor.execute(
                    '''
                        SELECT
                          %s
                        FROM `comment`
                        LEFT JOIN `target` ON `comment`.`user_id` = `target`.`id`
                        WHERE `comment`.`incident_id` in %%s
                        LIMIT %%s
                    ''' % (', '.join(field[0] for field in comment_fields)),
                    [tuple(incident_ids), batch_size])

            except Exception:
                metrics.incr('sql_errors')
                logger.exception('Failed getting comments')
                try:
                    cursor.close()
                except Exception:
                    pass
                cursor = connection.cursor(
                    engine.dialect.dbapi.cursors.SSCursor)
                break

            comment_ids = deque()

            for comment in cursor:
                archive_comment(comment, archive_path)
                comment_ids.append(comment[0])

            if not comment_ids:
                break

            logger.info('Archived %d comments', len(comment_ids))

            try:
                deleted_rows = cursor.execute(
                    'DELETE FROM `comment` WHERE `id` IN %s',
                    [tuple(comment_ids)])
                connection.commit()
            except Exception:
                metrics.incr('sql_errors')
                logger.exception('Failed deleting comments from incidents')
                try:
                    cursor.close()
                except Exception:
                    pass
                cursor = connection.cursor(
                    engine.dialect.dbapi.cursors.SSCursor)
                break
            else:
                if deleted_rows:
                    logger.info('Killed %d comments from %d incidents',
                                deleted_rows, len(incident_ids))
                    deleted_comments += deleted_rows
                    sleep(cooldown_time)
                else:
                    break

        # Kill all dynamic plan maps associated with these incidents
        while True:
            try:
                deleted_rows = cursor.execute(
                    'DELETE FROM `dynamic_plan_map` WHERE `incident_id` IN %s',
                    [tuple(incident_ids)])
                connection.commit()
            except Exception:
                metrics.incr('sql_errors')
                logger.exception('Failed deleting dynamic plan maps')
                try:
                    cursor.close()
                except Exception:
                    pass
                cursor = connection.cursor(
                    engine.dialect.dbapi.cursors.SSCursor)
                break
            else:
                if deleted_rows:
                    logger.info('Killed %d dynamic plan maps', deleted_rows)
                    deleted_messages += deleted_rows
                    sleep(cooldown_time)
                else:
                    break

        # Archive+Kill all messages in these incidents
        while True:

            try:
                cursor.execute(
                    '''
                        SELECT
                          %s
                        FROM `message`
                        JOIN `priority` on `priority`.`id` = `message`.`priority_id`
                        LEFT JOIN `mode` on `mode`.`id` = `message`.`mode_id`
                        LEFT JOIN `template` ON `message`.`template_id` = `template`.`id`
                        LEFT JOIN `target` ON `message`.`target_id` = `target`.`id`
                        WHERE `message`.`incident_id` in %%s
                        LIMIT %%s
                    ''' % (', '.join(field[0] for field in message_fields)),
                    [tuple(incident_ids), batch_size])

            except Exception:
                metrics.incr('sql_errors')
                logger.exception('Failed getting messages')
                try:
                    cursor.close()
                except Exception:
                    pass
                cursor = connection.cursor(
                    engine.dialect.dbapi.cursors.SSCursor)
                break

            message_ids = deque()

            for message in cursor:
                archive_message(message, archive_path)
                message_ids.append(message[0])

            if not message_ids:
                break

            logger.info('Archived %d messages', len(message_ids))

            # explicitly delete all the extra message data
            try:
                cursor.execute(
                    'DELETE FROM `message_changelog` WHERE `message_id` IN %s',
                    [tuple(message_ids)])
                cursor.execute(
                    'DELETE FROM `response` WHERE `message_id` IN %s',
                    [tuple(message_ids)])
                cursor.execute(
                    'DELETE FROM `twilio_delivery_status` WHERE `message_id` IN %s',
                    [tuple(message_ids)])
                cursor.execute(
                    'DELETE FROM `twilio_retry` WHERE `message_id` IN %s',
                    [tuple(message_ids)])
                cursor.execute(
                    'DELETE FROM `generic_message_sent_status` WHERE `message_id` IN %s',
                    [tuple(message_ids)])
                connection.commit()
            except Exception:
                metrics.incr('sql_errors')
                logger.exception('Failed deleting message child')
                try:
                    cursor.close()
                except Exception:
                    pass
                cursor = connection.cursor(
                    engine.dialect.dbapi.cursors.SSCursor)

            try:
                deleted_rows = cursor.execute(
                    'DELETE FROM `message` WHERE `id` IN %s',
                    [tuple(message_ids)])
                connection.commit()
            except Exception:
                metrics.incr('sql_errors')
                logger.exception('Failed deleting messages from incidents')
                try:
                    cursor.close()
                except Exception:
                    pass
                cursor = connection.cursor(
                    engine.dialect.dbapi.cursors.SSCursor)
                # try deleting individually to directly identify any issues and prevent single error from stopping cleanup
                deleted_rows = 0
                for msg_id in message_ids:
                    try:
                        deleted_rows += cursor.execute(
                            'DELETE FROM `message` WHERE `id`=%s', msg_id)
                        connection.commit()
                    except Exception:
                        metrics.incr('sql_errors')
                        logger.exception('Failed deleting message id: %s',
                                         msg_id)
            else:
                if deleted_rows:
                    logger.info('Killed %d messages from %d incidents',
                                deleted_rows, len(incident_ids))
                    deleted_messages += deleted_rows
                    sleep(cooldown_time)
                else:
                    break

        # Finally kill incidents
        try:
            deleted_rows = cursor.execute(
                'DELETE FROM `incident` WHERE `id` IN %s',
                [tuple(incident_ids)])
            connection.commit()
        except Exception:
            metrics.incr('sql_errors')
            logger.exception('Failed deleting incidents')
            try:
                cursor.close()
            except Exception:
                pass
            cursor = connection.cursor(engine.dialect.dbapi.cursors.SSCursor)
            # try deleting individually to directly identify any issues and prevent single error from stopping clean-up
            deleted_rows = 0
            for inc_id in incident_ids:
                try:
                    deleted_rows += cursor.execute(
                        'DELETE FROM `incident` WHERE `id`=%s', inc_id)
                    connection.commit()
                except Exception:
                    metrics.incr('sql_errors')
                    logger.exception('Failed deleting incident id: %s', inc_id)
        logger.info('Deleted %s incidents', deleted_rows)
        deleted_incidents += deleted_rows
        sleep(cooldown_time)

    # Next, kill messages not tied to incidents, like quota notifs or incident tracking emails
    while True:
        try:
            deleted_rows = cursor.execute(
                'DELETE FROM `message` WHERE `created` < (CURDATE() - INTERVAL %s DAY) AND `incident_id` IS NULL LIMIT %s',
                [max_days, batch_size])
            connection.commit()
        except Exception:
            metrics.incr('sql_errors')
            logger.exception('Failed deleting messages')
            try:
                cursor.close()
            except Exception:
                pass
            break
        else:
            if deleted_rows:
                logger.info('Killed %d misc messages', deleted_rows)
                deleted_messages += deleted_rows
                sleep(cooldown_time)
            else:
                break

    try:
        cursor.close()
    except Exception:
        pass
    connection.close()

    logger.info(
        'Run took %.2f seconds and deleted %d incidents and %d messages',
        time.time() - time_start, deleted_incidents, deleted_messages)
    metrics.set('deleted_messages', deleted_messages)
    metrics.set('deleted_incidents', deleted_incidents)
    metrics.set('deleted_comments', deleted_comments)
示例#24
0
def handle_api_notification_request(socket, address, req):
    notification = req['data']
    if 'application' not in notification:
        reject_api_request(socket, address, 'INVALID application')
        logger.warn('Dropping OOB message due to missing application key')
        return
    notification['subject'] = '[%s] %s' % (notification['application'],
                                           notification.get('subject', ''))
    role = notification.get('role')
    if not role:
        reject_api_request(socket, address, 'INVALID role')
        logger.warn('Dropping OOB message with invalid role "%s" from app %s',
                    role, notification['application'])
        return
    target = notification.get('target')
    if not target:
        reject_api_request(socket, address, 'INVALID target')
        logger.warn(
            'Dropping OOB message with invalid target "%s" from app %s',
            target, notification['application'])
        return
    expanded_targets = None
    # if role is literal_target skip unrolling
    if not notification.get('unexpanded'):
        try:
            expanded_targets = cache.targets_for_role(role, target)
        except IrisRoleLookupException:
            expanded_targets = None
        if not expanded_targets:
            reject_api_request(socket, address, 'INVALID role:target')
            logger.warn(
                'Dropping OOB message with invalid role:target "%s:%s" from app %s',
                role, target, notification['application'])
            return

    sanitize_unicode_dict(notification)

    # If we're rendering this using templates+context instead of body, fill in the
    # needed iris key.
    if 'template' in notification:
        if 'context' not in notification:
            logger.warn(
                'Dropping OOB message due to missing context from app %s',
                notification['application'])
            reject_api_request(socket, address, 'INVALID context')
            return
        else:
            # fill in dummy iris meta data
            notification['context']['iris'] = {}
    elif 'email_html' in notification:
        if not isinstance(notification['email_html'], basestring):
            logger.warn(
                'Dropping OOB message with invalid email_html from app %s: %s',
                notification['application'], notification['email_html'])
            reject_api_request(socket, address, 'INVALID email_html')
            return
    elif 'body' not in notification:
        reject_api_request(socket, address, 'INVALID body')
        logger.warn('Dropping OOB message with invalid body from app %s',
                    notification['application'])
        return

    access_logger.info(
        '-> %s OK, to %s:%s (%s:%s)', address, role, target,
        notification['application'],
        notification.get('priority', notification.get('mode', '?')))

    if notification.get('unexpanded'):
        notification['destination'] = notification['target']
        send_funcs['message_send_enqueue'](notification)
    else:
        for _target in expanded_targets:
            temp_notification = notification.copy()
            temp_notification['target'] = _target
            send_funcs['message_send_enqueue'](temp_notification)
    metrics.incr('notification_cnt')
    socket.sendall(msgpack.packb('OK'))
示例#25
0
文件: rpc.py 项目: yashrenhiet/prac
def handle_api_notification_request(socket, address, req):
    notification = req['data']
    if 'application' not in notification:
        reject_api_request(socket, address, 'INVALID application')
        logger.warning('Dropping OOB message due to missing application key')
        return
    notification['subject'] = '[%s] %s' % (notification['application'],
                                           notification.get('subject', ''))
    target_list = notification.get('target_list')
    role = notification.get('role')
    if not role and not target_list:
        reject_api_request(socket, address, 'INVALID role')
        logger.warning(
            'Dropping OOB message with invalid role "%s" from app %s', role,
            notification['application'])
        return
    target = notification.get('target')
    if not (target or target_list):
        reject_api_request(socket, address, 'INVALID target')
        logger.warning(
            'Dropping OOB message with invalid target "%s" from app %s',
            target, notification['application'])
        return
    expanded_targets = None
    # if role is literal_target skip unrolling
    if not notification.get('unexpanded'):
        # For multi-recipient notifications, pre-populate destination with literal targets,
        # then expand the remaining
        has_literal_target = False
        if target_list:
            expanded_targets = []
            notification['destination'] = []
            notification['bcc_destination'] = []
            for t in target_list:
                role = t['role']
                target = t['target']
                bcc = t.get('bcc')
                try:
                    if role == 'literal_target':
                        if bcc:
                            notification['bcc_destination'].append(target)
                        else:
                            notification['destination'].append(target)
                        has_literal_target = True
                    else:
                        expanded = cache.targets_for_role(role, target)
                        expanded_targets += [{
                            'target': e,
                            'bcc': bcc
                        } for e in expanded]
                except IrisRoleLookupException:
                    # Maintain best-effort delivery for remaining targets if one fails to resolve
                    continue
        else:
            try:
                expanded_targets = cache.targets_for_role(role, target)
            except IrisRoleLookupException:
                expanded_targets = None
        if not expanded_targets and not has_literal_target:
            reject_api_request(socket, address, 'INVALID role:target')
            logger.warning(
                'Dropping OOB message with invalid role:target "%s:%s" from app %s',
                role, target, notification['application'])
            return

    sanitize_unicode_dict(notification)

    # If we're rendering this using templates+context instead of body, fill in the
    # needed iris key.
    if 'template' in notification:
        if 'context' not in notification:
            logger.warning(
                'Dropping OOB message due to missing context from app %s',
                notification['application'])
            reject_api_request(socket, address, 'INVALID context')
            return
        else:
            # fill in dummy iris meta data
            notification['context']['iris'] = {}
    elif 'email_html' in notification:
        if not isinstance(notification['email_html'], str):
            logger.warning(
                'Dropping OOB message with invalid email_html from app %s: %s',
                notification['application'], notification['email_html'])
            reject_api_request(socket, address, 'INVALID email_html')
            return
    elif 'body' not in notification:
        reject_api_request(socket, address, 'INVALID body')
        logger.warning('Dropping OOB message with invalid body from app %s',
                       notification['application'])
        return

    access_logger.info(
        '-> %s OK, to %s:%s (%s:%s)', address, role, target,
        notification['application'],
        notification.get('priority', notification.get('mode', '?')))

    notification_count = 1
    if notification.get('unexpanded'):
        notification['destination'] = notification['target']
        send_funcs['message_send_enqueue'](notification)
    elif notification.get('multi-recipient'):
        notification['target'] = expanded_targets
        send_funcs['message_send_enqueue'](notification)
        notification_count = len(expanded_targets)
    else:
        for _target in expanded_targets:
            temp_notification = notification.copy()
            temp_notification['target'] = _target
            send_funcs['message_send_enqueue'](temp_notification)
    metrics.incr('notification_cnt', inc=notification_count)
    socket.sendall(msgpack.packb('OK'))
示例#26
0
def process_retention(engine, max_days, batch_size, cooldown_time, archive_path):
    time_start = time.time()

    connection = engine.raw_connection()
    cursor = connection.cursor(engine.dialect.dbapi.cursors.SSCursor)

    deleted_incidents = 0
    deleted_messages = 0

    # First, archive/kill incidents and their messages
    while True:

        # Get incidents to archive and kill, in batches
        try:
            cursor.execute(
                '''
                    SELECT
                        %s
                    FROM `incident`
                    LEFT JOIN `plan` on `plan`.`id` = `incident`.`plan_id`
                    LEFT JOIN `application` on `application`.`id` = `incident`.`application_id`
                    LEFT JOIN `target` ON `incident`.`owner_id` = `target`.`id`
                    WHERE `incident`.`created` < (CURDATE() - INTERVAL %%s DAY)
                    LIMIT %%s
                ''' % (', '.join(field[0] for field in incident_fields)),
                [max_days, batch_size])
        except Exception:
            logger.exception('Failed getting incidents')
            try:
                cursor.close()
            except Exception:
                pass
            cursor = connection.cursor(engine.dialect.dbapi.cursors.SSCursor)
            break

        incident_ids = deque()

        for incident in cursor:
            archive_incident(incident, archive_path)
            incident_ids.append(incident[0])

        if not incident_ids:
            break

        logger.info('Archived %d incidents', len(incident_ids))

        # Archive+Kill all messages in these incidents
        while True:

            try:
                cursor.execute(
                    '''
                        SELECT
                          %s
                        FROM `message`
                        JOIN `priority` on `priority`.`id` = `message`.`priority_id`
                        LEFT JOIN `mode` on `mode`.`id` = `message`.`mode_id`
                        LEFT JOIN `template` ON `message`.`template_id` = `template`.`id`
                        LEFT JOIN `target` ON `message`.`target_id` = `target`.`id`
                        WHERE `message`.`incident_id` in %%s
                        LIMIT %%s
                    ''' % (', '.join(field[0] for field in message_fields)),
                    [tuple(incident_ids), batch_size])

            except Exception:
                metrics.incr('sql_errors')
                logger.exception('Failed getting messages')
                try:
                    cursor.close()
                except Exception:
                    pass
                cursor = connection.cursor(engine.dialect.dbapi.cursors.SSCursor)
                break

            message_ids = deque()

            for message in cursor:
                archive_message(message, archive_path)
                message_ids.append(message[0])

            if not message_ids:
                break

            logger.info('Archived %d messages', len(message_ids))

            try:
                deleted_rows = cursor.execute('DELETE FROM `message` WHERE `id` IN %s', [tuple(message_ids)])
                connection.commit()
            except Exception:
                metrics.incr('sql_errors')
                logger.exception('Failed deleting messages from incidents')
                try:
                    cursor.close()
                except Exception:
                    pass
                cursor = connection.cursor(engine.dialect.dbapi.cursors.SSCursor)
                break
            else:
                if deleted_rows:
                    logger.info('Killed %d messages from %d incidents', deleted_rows, len(incident_ids))
                    deleted_messages += deleted_rows
                    sleep(cooldown_time)
                else:
                    break

        # Finally kill incidents
        try:
            deleted_rows = cursor.execute('DELETE FROM `incident` WHERE `id` IN %s', [tuple(incident_ids)])
            connection.commit()
        except Exception:
            metrics.incr('sql_errors')
            logger.exception('Failed deleting incidents')
            try:
                cursor.close()
            except Exception:
                pass
            cursor = connection.cursor(engine.dialect.dbapi.cursors.SSCursor)
            break
        else:
            logger.info('Deleted %s incidents', deleted_rows)
            deleted_incidents += deleted_rows
            sleep(cooldown_time)

    # Next, kill messages not tied to incidents, like quota notifs or incident tracking emails
    while True:
        try:
            deleted_rows = cursor.execute('DELETE FROM `message` WHERE `created` < (CURDATE() - INTERVAL %s DAY) AND `incident_id` IS NULL LIMIT %s', [max_days, batch_size])
            connection.commit()
        except Exception:
            metrics.incr('sql_errors')
            logger.exception('Failed deleting messages')
            try:
                cursor.close()
            except Exception:
                pass
            break
        else:
            if deleted_rows:
                logger.info('Killed %d misc messages', deleted_rows)
                deleted_messages += deleted_rows
                sleep(cooldown_time)
            else:
                break

    try:
        cursor.close()
    except Exception:
        pass
    connection.close()

    logger.info('Run took %.2f seconds and deleted %d incidents and %d messages', time.time() - time_start, deleted_incidents, deleted_messages)
    metrics.set('deleted_messages', deleted_messages)
    metrics.set('deleted_incidents', deleted_incidents)
示例#27
0
文件: owasync.py 项目: p6knewman/iris
def relay(message, iris_client):
    if message.headers is None:
        logger.info('Ignoring message with no headers %s (from %s)',
                    message.message_id, message.sender.email_address)
        metrics.incr('message_ignore_count')
        return

    # Get headers into the format the iris expects from gmail
    headers = [{
        'name': header.name,
        'value': header.value
    } for header in message.headers]

    # If this is a bulk message or an auto reply or something else, don't bother sending it to iris-api
    if is_pointless_message(headers):
        logger.info('Not relaying pointless message %s (from %s) to iris-api',
                    message.message_id, message.sender.email_address)
        metrics.incr('message_ignore_count')
        return

    # To and From headers are strangely missing
    if message.to_recipients:
        headers.append({
            'name': 'To',
            'value': message.to_recipients[0].email_address
        })
    headers.append({'name': 'From', 'value': message.sender.email_address})

    data = {'headers': headers, 'body': message.text_body.strip()}

    try:
        req = iris_client.post('response/email', json=data)
    except requests.exceptions.RequestException:
        metrics.incr('message_relay_failure_count')
        logger.exception('Failed posting message %s (from %s) to iris-api',
                         message.message_id, message.sender.email_address)
        return

    code_type = req.status_code // 100

    if code_type == 5:
        metrics.incr('message_relay_failure_count')
        logger.error(
            'Failed posting message %s (from %s) to iris-api. Got status code %s and response %s',
            message.message_id, message.sender.email_address, req.status_code,
            req.text)

    elif code_type == 4:
        metrics.incr('malformed_message_count')
        logger.error(
            'Failed posting message %s (from %s) to iris-api. Message likely malformed. Status code: %s. Response: %s',
            message.message_id, message.sender.email_address, req.status_code,
            req.text)

    elif code_type == 2:
        metrics.incr('message_relay_success_count')

        # If we create an incident using an email, this header will be set and will be the numeric ID
        # of the created incident; otherwise, the header will not exist or it will be a textual
        # error message.
        incident_header = req.headers.get('X-IRIS-INCIDENT')
        if isinstance(incident_header,
                      basestring) and incident_header.isdigit():
            metrics.incr('incident_created_count')

    else:
        logger.error(
            'Failed posting message %s (from %s) to iris-api. Message likely malformed. Got back strange status code: %s. Response: %s',
            message.message_id, message.sender.email_address, req.status_code,
            req.text)
示例#28
0
文件: sender.py 项目: shoxv/iris
def fetch_and_send_message():
    message = send_queue.get()

    has_contact = set_target_contact(message)
    if not has_contact:
        mark_message_has_no_contact(message)
        return

    if 'message_id' not in message:
        message['message_id'] = None

    drop_mode_id = api_cache.modes.get('drop')

    # If this app breaches hard quota, drop message on floor, and update in UI if it has an ID
    if not quota.allow_send(message):
        logger.warn(
            'Hard message quota exceeded; Dropping this message on floor: %s',
            message)
        if message['message_id']:
            spawn(auditlog.message_change, message['message_id'],
                  auditlog.MODE_CHANGE, message.get('mode', '?'), 'drop',
                  'Dropping due to hard quota violation.')

            # If we know the ID for the mode drop, reflect that for the message
            if drop_mode_id:
                message['mode'] = 'drop'
                message['mode_id'] = drop_mode_id
            else:
                logger.error(
                    'Can\'t mark message %s as dropped as we don\'t know the mode ID for %s',
                    message, 'drop')

            # Render, so we're able to populate the message table with the proper subject/etc as well as
            # information that it was dropped.
            render(message)
            mark_message_as_sent(message)
        return

    # If we're set to drop this message, no-op this before message gets sent to a vendor
    if message.get('mode') == 'drop':
        if message['message_id']:
            render(message)
            mark_message_as_sent(message)
        add_mode_stat('drop', 0)

        metrics_key = 'app_%(application)s_mode_drop_cnt' % message
        metrics.add_new_metrics({metrics_key: 0})
        metrics.incr(metrics_key)

        return

    render(message)

    # Drop this message, and mark it as dropped, rather than sending it, if its body is too long and we were normally
    # going to send it anyway.
    body_length = len(message['body'])
    if body_length > MAX_MESSAGE_BODY_LENGTH:
        logger.warn(
            'Message id %s has a ridiculously long body (%s chars). Dropping it.',
            message['message_id'], body_length)
        spawn(
            auditlog.message_change, message['message_id'],
            auditlog.MODE_CHANGE, message.get('mode', '?'), 'drop',
            'Dropping due to excessive body length (%s > %s chars)' %
            (body_length, MAX_MESSAGE_BODY_LENGTH))

        metrics.incr('msg_drop_length_cnt')

        # Truncate this here to avoid a duplicate log message in mark_message_as_sent(), as we still need to call
        # that to update the body/subject
        message['body'] = message['body'][:MAX_MESSAGE_BODY_LENGTH]

        if drop_mode_id:
            message['mode'] = 'drop'
            message['mode_id'] = drop_mode_id

        mark_message_as_sent(message)
        return

    success = None
    try:
        success = distributed_send_message(message)
    except Exception:
        logger.exception('Failed to send message: %s', message)
        if message['mode'] == 'email':
            logger.error(
                'unable to send %(mode)s %(message_id)s %(application)s %(destination)s %(subject)s %(body)s',
                message)
            metrics.incr('task_failure')
        else:
            logger.error(
                'reclassifying as email %(mode)s %(message_id)s %(application)s %(destination)s %(subject)s %(body)s',
                message)
            old_mode = message['mode']
            if (set_target_fallback_mode(message)):
                update_message_mode(message)
                auditlog.message_change(
                    message['message_id'], auditlog.MODE_CHANGE, old_mode,
                    message['mode'],
                    'Changing mode due to original mode failure')
            render(message)
            try:
                success = distributed_send_message(message)
            # nope - log and bail
            except Exception:
                metrics.incr('task_failure')
                logger.error(
                    'unable to send %(mode)s %(message_id)s %(application)s %(destination)s %(subject)s %(body)s',
                    message)
    if success:
        metrics.incr('message_send_cnt')
        if message['message_id']:
            mark_message_as_sent(message)

    if message['message_id']:
        update_message_sent_status(message, success)
示例#29
0
def main():
    global config
    config = load_config()

    start_time = time.time()

    logger.info('[-] bootstraping sender...')
    init_sender(config)
    init_plugins(config.get('plugins', {}))
    init_vendors(config.get('vendors', []), config.get('applications', []))

    send_task = spawn(send)
    worker_tasks = [spawn(worker) for x in xrange(100)]

    rpc.init(config['sender'], dict(send_message=send_message))
    rpc.run(config['sender'])

    spawn(coordinator.update_forever)

    gwatch_renewer_task = None
    prune_audit_logs_task = None

    interval = 60
    logger.info('[*] sender bootstrapped')
    while True:
        runtime = int(time.time())
        logger.info('--> sender looop started.')

        cache.refresh()
        cache.purge()

        # If we're currently a master, ensure our master-greenlets are running
        # and we're doing the master duties
        if coordinator.am_i_master():
            if not bool(gwatch_renewer_task):
                if should_mock_gwatch_renewer:
                    gwatch_renewer_task = spawn(mock_gwatch_renewer)
                else:
                    gwatch_renewer_task = spawn(gwatch_renewer)

            if not bool(prune_audit_logs_task):
                prune_audit_logs_task = spawn(prune_old_audit_logs_worker)

            try:
                escalate()
                deactivate()
                poll()
                aggregate(runtime)
            except Exception:
                metrics.incr('task_failure')
                logger.exception("Exception occured in main loop.")

        # If we're not master, don't do the master tasks and make sure those other
        # greenlets are stopped if they're running
        else:
            logger.info('I am not the master so I am not doing master sender tasks.')

            # Stop these task greenlets if they're running. Technically this should
            # never happen because if we're the master, we'll likely only stop being the
            # master if our process exits, which would kill these greenlets anyway.
            if bool(gwatch_renewer_task):
                logger.info('I am not master anymore so stopping the gwatch renewer')
                gwatch_renewer_task.kill()

            if bool(prune_audit_logs_task):
                logger.info('I am not master anymore so stopping the audit logs worker')
                prune_audit_logs_task.kill()

        # check status for all background greenlets and respawn if necessary
        if not bool(send_task):
            logger.error("send task failed, %s", send_task.exception)
            metrics.incr('task_failure')
            send_task = spawn(send)
        bad_workers = []
        for i, task in enumerate(worker_tasks):
            if not bool(task):
                logger.error("worker task failed, %s", task.exception)
                metrics.incr('task_failure')
                bad_workers.append(i)
        for i in bad_workers:
            worker_tasks[i] = spawn(worker)

        now = time.time()
        metrics.set('sender_uptime', int(now - start_time))

        spawn(metrics.emit)

        elapsed_time = now - runtime
        nap_time = max(0, interval - elapsed_time)
        logger.info('--> sender loop finished in %s seconds - sleeping %s seconds',
                    elapsed_time, nap_time)
        sleep(nap_time)
示例#30
0
文件: sender.py 项目: shoxv/iris
def create_messages(incident_id, plan_notification_id):
    application_id = cache.incidents[incident_id]['application_id']
    plan_notification = cache.plan_notifications[plan_notification_id]
    role = cache.roles[plan_notification['role_id']]['name']
    target = cache.targets[plan_notification['target_id']]['name']
    # find role/priority from plan_notification_id
    names = cache.targets_for_role(role, target)
    priority_id = plan_notification['priority_id']
    changed_target = False
    body = ''

    if not names:
        metrics.incr('role_target_lookup_error')

        # Try to get creator of the plan and nag them instead
        name = None
        try:
            name = cache.plans[plan_notification['plan_id']]['creator']
        except (KeyError, TypeError):
            pass

        if not name:
            logger.error((
                'Failed to find targets for incident %s, plan_notification_id: %s, role: %s, target: %s, result: %s and failed looking '
                'up the plan\'s creator'), incident_id, plan_notification_id,
                         role, target, names)
            return False

        try:
            priority_id = api_cache.priorities['low']['id']
        except KeyError:
            logger.error((
                'Failed to find targets for incident %s, plan_notification_id: %s, role: %s, target: %s, result: %s and failed looking '
                'up ID for low priority'), incident_id, plan_notification_id,
                         role, target, names)
            return False

        logger.error((
            'Failed to find targets for incident %s, plan_notification_id: %s, role: %s, target: %s, result: %s. '
            'Reaching out to %s instead and lowering priority to low (%s)'),
                     incident_id, plan_notification_id, role, target, names,
                     name, priority_id)

        body = 'You are receiving this as you created this plan and we can\'t resolve %s of %s at this time.\n\n' % (
            role, target)
        names = [name]
        changed_target = True

    connection = db.engine.raw_connection()
    cursor = connection.cursor()

    for name in names:
        t = cache.target_names[name]
        if t:
            target_id = t['id']
            cursor.execute(
                INSERT_MESSAGE_SQL,
                (plan_notification['plan_id'], plan_notification_id,
                 incident_id, application_id, target_id, priority_id, body))

            if changed_target:
                connection.commit(
                )  # needed for the lastrowid to exist in the DB to satsify the constraint
                auditlog.message_change(
                    cursor.lastrowid, auditlog.TARGET_CHANGE,
                    role + '|' + target, name,
                    'Changing target as we failed resolving original target')

        else:
            metrics.incr('target_not_found')
            logger.error('No target found: %s', name)

    connection.commit()
    cursor.close()
    connection.close()
    return True