Exemplo n.º 1
0
    def update_rule(self):
        """
        Adds or removes the rule for the block.
        """

        rules = list_replication_rules(filters={'scope': self.scope, 'name': self.block_name})
        # rules = self.rcli.list_did_rules(scope=self.scope, name=self.block_name)
        rse_expression = 'rse=' + self.rse

        remove_rules = [rule for rule in rules
                        if rule['account'] == self.account and rule['rse_expression'] == rse_expression]

        if not remove_rules and self.is_at_pnn:
            self.rule_exists = False
            if self.dry_run:
                logging.info("Dry run: Adding rule for dataset %s at rse %s.", self.block_name, self.rse)
            else:
                self.add_replication_rule_with_defaults(dids=[{'scope': self.scope, 'name': self.block_name}],
                                                        copies=1, rse_expression=rse_expression, account=self.account)
                monitor.record_counter('cms_sync.rules_added')
            self.rule_exists = True
        elif remove_rules and not self.is_at_pnn:
            self.rule_exists = True
            if self.dry_run:
                logging.info("Removing rules for dataset %s at rse %s.", self.block_name, self.rse)
            else:
                for rule in remove_rules:
                    # delete_replication_rule(rule['id'], purge_replicas=False, issuer=self.account)
                    delete_rule(rule_id=rule['id'], purge_replicas=True, soft=False)
                    monitor.record_counter('cms_sync.rules_removed')
            self.rule_exists = False
Exemplo n.º 2
0
    def test_bb8_rebalance_rule(self):
        """ BB8: Test the rebalance rule method"""
        scope = InternalScope('mock', **self.vo)
        files = create_files(3, scope, self.rse1_id)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe)
        attach_dids(scope, dataset, files, self.jdoe)

        rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0]

        rule = {}
        try:
            rule = get_rule(rule_id)
        except:
            assert_raises(RuleNotFound, get_rule, rule_id)
        child_rule = rebalance_rule(rule, 'Rebalance', self.rse3, priority=3)

        rule_cleaner(once=True)

        assert(get_rule(rule_id)['expires_at'] <= datetime.utcnow())
        assert(get_rule(rule_id)['child_rule_id'] == child_rule)

        rule_cleaner(once=True)

        assert(get_rule(rule_id)['expires_at'] <= datetime.utcnow())

        successful_transfer(scope=scope, name=files[0]['name'], rse_id=self.rse3_id, nowait=False)
        successful_transfer(scope=scope, name=files[1]['name'], rse_id=self.rse3_id, nowait=False)
        with assert_raises(UnsupportedOperation):
            delete_rule(rule_id)
        successful_transfer(scope=scope, name=files[2]['name'], rse_id=self.rse3_id, nowait=False)

        rule_cleaner(once=True)
        assert(get_rule(child_rule)['state'] == RuleState.OK)
Exemplo n.º 3
0
    def cleanup(self, session=None):
        if not self.created_dids:
            return

        # Cleanup Transfers
        session.query(models.Source).filter(or_(and_(models.Source.scope == did['scope'],
                                                     models.Source.name == did['name'])
                                                for did in self.created_dids)).delete(synchronize_session=False)
        session.query(models.Request).filter(or_(and_(models.Request.scope == did['scope'],
                                                      models.Request.name == did['name'])
                                                 for did in self.created_dids)).delete(synchronize_session=False)

        # Cleanup Locks Rules
        query = session.query(models.ReplicationRule.id).filter(or_(and_(models.ReplicationRule.scope == did['scope'],
                                                                         models.ReplicationRule.name == did['name'])
                                                                    for did in self.created_dids))
        for rule_id, in query:
            rule_core.delete_rule(rule_id, session=session)

        # Cleanup Replicas and Parent Datasets
        dids_by_rse = {}
        replicas = list(replica_core.list_replicas(self.created_dids, all_states=True, session=session))
        for replica in replicas:
            for rse_id in replica['rses']:
                dids_by_rse.setdefault(rse_id, []).append({'scope': replica['scope'], 'name': replica['name']})
        for rse_id, dids in dids_by_rse.items():
            replica_core.delete_replicas(rse_id=rse_id, files=dids, session=session)
Exemplo n.º 4
0
def delete_replication_rule(rule_id,
                            purge_replicas,
                            issuer,
                            vo='def',
                            session=None):
    """
    Deletes a replication rule and all associated locks.

    :param rule_id:        The id of the rule to be deleted
    :param purge_replicas: Purge the replicas immediately
    :param issuer:         The issuing account of this operation
    :param vo:             The VO to act on.
    :param session:        The database session in use.
    :raises:               RuleNotFound, AccessDenied
    """
    kwargs = {'rule_id': rule_id, 'purge_replicas': purge_replicas}
    if is_multi_vo(
            session=session) and not has_permission(issuer=issuer,
                                                    vo=vo,
                                                    action='access_rule_vo',
                                                    kwargs=kwargs,
                                                    session=session):
        raise AccessDenied('Account %s can not access rules at other VOs.' %
                           (issuer))
    if not has_permission(
            issuer=issuer, vo=vo, action='del_rule', kwargs=kwargs):
        raise AccessDenied('Account %s can not remove this replication rule.' %
                           (issuer))
    rule.delete_rule(rule_id=rule_id,
                     purge_replicas=purge_replicas,
                     soft=True,
                     session=session)
Exemplo n.º 5
0
def test_bb8_rebalance_rule(vo, root_account, jdoe_account, rse_factory, mock_scope, did_factory):
    """BB8: Test the rebalance rule method"""
    rse1, rse1_id = rse_factory.make_posix_rse()
    rse2, rse2_id = rse_factory.make_posix_rse()

    # Add Tags
    T1 = tag_generator()
    T2 = tag_generator()
    add_rse_attribute(rse1_id, T1, True)
    add_rse_attribute(rse2_id, T2, True)

    # Add fake weights
    add_rse_attribute(rse1_id, "fakeweight", 10)
    add_rse_attribute(rse2_id, "fakeweight", 0)

    # Add quota
    set_local_account_limit(jdoe_account, rse1_id, -1)
    set_local_account_limit(jdoe_account, rse2_id, -1)

    set_local_account_limit(root_account, rse1_id, -1)
    set_local_account_limit(root_account, rse2_id, -1)

    files = create_files(3, mock_scope, rse1_id)
    dataset = did_factory.make_dataset()
    attach_dids(mock_scope, dataset['name'], files, jdoe_account)
    set_status(mock_scope, dataset['name'], open=False)

    # Invalid the cache because the result of parse_expression is cached
    REGION.invalidate()

    rule_id = add_rule(dids=[{'scope': mock_scope, 'name': dataset['name']}], account=jdoe_account, copies=1, rse_expression=rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0]
    rule = {}
    try:
        rule = get_rule(rule_id)
    except:
        pytest.raises(RuleNotFound, get_rule, rule_id)
    child_rule = rebalance_rule(rule, 'Rebalance', rse2, priority=3)

    rule_cleaner(once=True)

    assert(get_rule(rule_id)['expires_at'] <= datetime.utcnow())
    assert(get_rule(rule_id)['child_rule_id'] == child_rule)

    rule_cleaner(once=True)

    assert(get_rule(rule_id)['expires_at'] <= datetime.utcnow())

    successful_transfer(scope=mock_scope, name=files[0]['name'], rse_id=rse2_id, nowait=False)
    successful_transfer(scope=mock_scope, name=files[1]['name'], rse_id=rse2_id, nowait=False)
    with pytest.raises(UnsupportedOperation):
        delete_rule(rule_id)
    successful_transfer(scope=mock_scope, name=files[2]['name'], rse_id=rse2_id, nowait=False)

    rule_cleaner(once=True)
    assert(get_rule(child_rule)['state'] == RuleState.OK)
    set_metadata(mock_scope, dataset['name'], 'lifetime', -86400)
    undertaker.run(once=True)
Exemplo n.º 6
0
 def __cleanup_locks_and_rules(self, session=None):
     query = session.query(models.ReplicationRule.id).filter(
         or_(
             and_(models.ReplicationRule.scope == did['scope'],
                  models.ReplicationRule.name == did['name'])
             for did in self.created_dids))
     for rule_id, in query:
         rule_core.delete_rule(rule_id,
                               session=session,
                               ignore_rule_lock=True)
Exemplo n.º 7
0
def delete_replication_rule(rule_id, purge_replicas, issuer):
    """
    Deletes a replication rule and all associated locks.

    :param rule_id:  The id of the rule to be deleted
    :param issuer:   The issuing account of this operation
    :raises:         RuleNotFound, AccessDenied
    """
    kwargs = {'rule_id': rule_id, 'purge_replicas': purge_replicas}
    if not has_permission(issuer=issuer, action='del_rule', kwargs=kwargs):
        raise AccessDenied('Account %s can not remove this replication rule.' % (issuer))
    rule.delete_rule(rule_id=rule_id, purge_replicas=purge_replicas, soft=True)
Exemplo n.º 8
0
def delete_replication_rule(rule_id, issuer):
    """
    Deletes a replication rule and all associated locks.

    :param rule_id:  The id of the rule to be deleted
    :param issuer:   The issuing account of this operation
    :raises:         RuleNotFound, AccessDenied
    """
    kwargs = {'rule_id': rule_id}
    if not has_permission(issuer=issuer, action='del_rule', kwargs=kwargs):
        raise AccessDenied('Account %s can not remove this replication rule.' % (issuer))
    rule.delete_rule(rule_id)
Exemplo n.º 9
0
    def test_locked_rule(self):
        """ REPLICATION RULE (CLIENT): Delete a locked replication rule"""
        scope = 'mock'
        files = create_files(3, scope, self.rse1)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=True, subscription_id=None)[0]

        assert_raises(AccessDenied, delete_rule, rule_id_1)
        self.rule_client.update_replication_rule(rule_id=rule_id_1, options={'locked': False})
        delete_rule(rule_id=rule_id_1)
Exemplo n.º 10
0
    def test_delete_rule(self):
        """ REPLICATION RULE (CORE): Test to delete a previously created rule"""
        scope = 'mock'
        files = create_files(3, scope, self.rse1)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0]
        delete_rule(rule_id)
        for file in files:
            rse_locks = get_replica_locks(scope=file['scope'], name=file['name'])
            assert(len(rse_locks) == 0)
        assert_raises(RuleNotFound, delete_rule, uuid())
Exemplo n.º 11
0
    def test_add_rule_with_purge(self):
        """ REPLICATION RULE (CORE): Add a replication rule with purge setting"""
        scope = 'mock'
        files = create_files(3, scope, self.rse1)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse4, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None, purge_replicas=True)[0]

        delete_rule(rule_id)

        # Check if the Locks are created properly
        for file in files:
            replica = get_replica(rse=self.rse4, scope=file['scope'], name=file['name'])
            assert(replica['tombstone'] == OBSOLETE)
Exemplo n.º 12
0
    def test_delete_rule_and_cancel_transfers(self):
        """ REPLICATION RULE (CORE): Test to delete a previously created rule and do not cancel overlapping transfers"""
        scope = 'mock'
        files = create_files(3, scope, self.rse1)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0]
        add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0]
        add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=3, rse_expression=self.T1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0]

        delete_rule(rule_id_1)

        for file in files:
            rse_locks = get_replica_locks(scope=file['scope'], name=file['name'])
            assert(len(rse_locks) == 5)
            # TODO Need to check transfer queue here, this is actually not the check of this test case
        assert_raises(RuleNotFound, delete_rule, uuid())
Exemplo n.º 13
0
    def test_account_counter_rule_delete(self):
        """ REPLICATION RULE (CORE): Test if the account counter is updated correctly when a rule is removed"""

        scope = 'mock'
        files = create_files(3, scope, self.rse1, bytes=100)
        dataset = 'dataset_' + str(uuid())
        add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe')
        attach_dids(scope, dataset, files, 'jdoe')

        rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None)[0]

        account_update(once=True)
        account_counter_before = get_account_counter(self.rse1_id, 'jdoe')

        delete_rule(rule_id)
        account_update(once=True)

        # Check if the counter has been updated correctly
        account_counter_after = get_account_counter(self.rse1_id, 'jdoe')
        assert(account_counter_before['bytes'] - 3*100 == account_counter_after['bytes'])
        assert(account_counter_before['files'] - 3 == account_counter_after['files'])
Exemplo n.º 14
0
def delete_sync_rule(rule_id, session=None):
    rule = get_rule(rule_id, session=session)
    if rule["did_type"] != DIDType.DATASET:
        raise RuntimeError("Rule applies to did with wrong type")
    block = rule["name"]
    try:
        rse_id = get_rse_id(rse=rule["rse_expression"], session=session)
    except RSENotFound:
        raise RuntimeError("Rule does not apply to a specific RSE")
    scope = rule["scope"]
    account = rule["account"]

    files = []
    for file in list_files(scope, block, long=False, session=session):
        files.append(
            {"scope": scope, "name": file["name"], "rse_id": rse_id, "state": "U"}
        )

    update_replicas_states(
        replicas=files, add_tombstone=False, session=session
    )
    delete_rule(rule_id=rule_id, purge_replicas=True, soft=False, session=session)
Exemplo n.º 15
0
    def cleanup(self, session=None):
        if not self.created_rses:
            return
        # Cleanup Transfers
        session.query(models.Source).filter(or_(models.Source.dest_rse_id.in_(self.created_rses),
                                                models.Source.rse_id.in_(self.created_rses))).delete(synchronize_session=False)
        session.query(models.Request).filter(or_(models.Request.dest_rse_id.in_(self.created_rses),
                                                 models.Request.source_rse_id.in_(self.created_rses))).delete(synchronize_session=False)

        # Cleanup Locks and Rules
        query = session.query(models.ReplicationRule.id). \
            join(models.ReplicaLock, models.ReplicationRule.id == models.ReplicaLock.rule_id). \
            filter(models.ReplicaLock.rse_id.in_(self.created_rses)).distinct()
        for rule_id, in query:
            rule_core.delete_rule(rule_id, session=session)

        # Cleanup Replicas and Parent Datasets
        query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.rse_id). \
            filter(models.RSEFileAssociation.rse_id.in_(self.created_rses))
        dids_by_rse = {}
        for scope, name, rse_id in query:
            dids_by_rse.setdefault(rse_id, []).append({'scope': scope, 'name': name})
        for rse_id, dids in dids_by_rse.items():
            replica_core.delete_replicas(rse_id=rse_id, files=dids, session=session)

        # Cleanup RSEs
        for model in (models.RSEAttrAssociation, models.RSEProtocols, models.UpdatedRSECounter,
                      models.RSEUsage, models.RSELimit, models.RSETransferLimit, models.RSEQoSAssociation):
            session.query(model).filter(model.rse_id.in_(self.created_rses)).delete(synchronize_session=False)

        session.query(models.Distance).filter(or_(models.Distance.src_rse_id.in_(self.created_rses),
                                                  models.Distance.dest_rse_id.in_(self.created_rses))).delete(synchronize_session=False)
        for rse_id in self.created_rses:
            # Only archive RSE instead of deleting. Account handling code doesn't expect RSEs to ever be deleted.
            # So running test in parallel results in some tests failing on foreign key errors.
            rse_core.del_rse(rse_id, session=session)
Exemplo n.º 16
0
def rule_cleaner(once=False):
    """
    Main loop to check for expired replication rules
    """

    hostname = socket.gethostname()
    pid = os.getpid()
    current_thread = threading.current_thread()

    paused_rules = {}  # {rule_id: datetime}

    # Make an initial heartbeat so that all judge-cleaners have the correct worker number on the next try
    live(executable='rucio-judge-cleaner', hostname=hostname, pid=pid, thread=current_thread)
    graceful_stop.wait(1)

    while not graceful_stop.is_set():
        try:
            # heartbeat
            heartbeat = live(executable='rucio-judge-cleaner', hostname=hostname, pid=pid, thread=current_thread)

            start = time.time()

            # Refresh paused rules
            iter_paused_rules = deepcopy(paused_rules)
            for key in iter_paused_rules:
                if datetime.utcnow() > paused_rules[key]:
                    del paused_rules[key]

            rules = get_expired_rules(total_workers=heartbeat['nr_threads'] - 1,
                                      worker_number=heartbeat['assign_thread'],
                                      limit=200,
                                      blacklisted_rules=[key for key in paused_rules])
            logging.debug('rule_cleaner[%s/%s] index query time %f fetch size is %d' % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1, time.time() - start, len(rules)))

            if not rules and not once:
                logging.debug('rule_cleaner[%s/%s] did not get any work (paused_rules=%s)' % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1, str(len(paused_rules))))
                graceful_stop.wait(60)
            else:
                for rule in rules:
                    rule_id = rule[0]
                    rule_expression = rule[1]
                    logging.info('rule_cleaner[%s/%s]: Deleting rule %s with expression %s' % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1, rule_id, rule_expression))
                    if graceful_stop.is_set():
                        break
                    try:
                        start = time.time()
                        delete_rule(rule_id=rule_id, nowait=True)
                        logging.debug('rule_cleaner[%s/%s]: deletion of %s took %f' % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1, rule_id, time.time() - start))
                    except (DatabaseException, DatabaseError, UnsupportedOperation), e:
                        if match('.*ORA-00054.*', str(e.args[0])):
                            paused_rules[rule_id] = datetime.utcnow() + timedelta(seconds=randint(600, 2400))
                            record_counter('rule.judge.exceptions.LocksDetected')
                            logging.warning('rule_cleaner[%s/%s]: Locks detected for %s' % (heartbeat['assign_thread'], heartbeat['nr_threads'] - 1, rule_id))
                        elif match('.*QueuePool.*', str(e.args[0])):
                            logging.warning(traceback.format_exc())
                            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                        elif match('.*ORA-03135.*', str(e.args[0])):
                            logging.warning(traceback.format_exc())
                            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                        else:
                            logging.error(traceback.format_exc())
                            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                    except RuleNotFound, e:
                        pass
Exemplo n.º 17
0
def rule_cleaner(once=False):
    """
    Main loop to check for expired replication rules
    """

    hostname = socket.gethostname()
    pid = os.getpid()
    current_thread = threading.current_thread()

    paused_rules = {}  # {rule_id: datetime}

    # Make an initial heartbeat so that all judge-cleaners have the correct worker number on the next try
    executable = 'judge-cleaner'
    heartbeat = live(executable=executable,
                     hostname=hostname,
                     pid=pid,
                     thread=current_thread)
    prefix = 'judge-cleaner[%i/%i] ' % (heartbeat['assign_thread'],
                                        heartbeat['nr_threads'])
    logger = formatted_logger(logging.log, prefix + '%s')
    graceful_stop.wait(1)

    while not graceful_stop.is_set():
        try:
            # heartbeat
            heartbeat = live(executable=executable,
                             hostname=hostname,
                             pid=pid,
                             thread=current_thread)
            prefix = 'judge-cleaner[%i/%i] ' % (heartbeat['assign_thread'],
                                                heartbeat['nr_threads'])
            logger = formatted_logger(logging.log, prefix + '%s')

            start = time.time()

            # Refresh paused rules
            iter_paused_rules = deepcopy(paused_rules)
            for key in iter_paused_rules:
                if datetime.utcnow() > paused_rules[key]:
                    del paused_rules[key]

            rules = get_expired_rules(
                total_workers=heartbeat['nr_threads'],
                worker_number=heartbeat['assign_thread'],
                limit=200,
                blocked_rules=[key for key in paused_rules])
            logger(
                logging.DEBUG, 'index query time %f fetch size is %d' %
                (time.time() - start, len(rules)))

            if not rules and not once:
                logger(
                    logging.DEBUG, 'did not get any work (paused_rules=%s)' %
                    str(len(paused_rules)))
                graceful_stop.wait(60)
            else:
                for rule in rules:
                    rule_id = rule[0]
                    rule_expression = rule[1]
                    logger(
                        logging.INFO, 'Deleting rule %s with expression %s' %
                        (rule_id, rule_expression))
                    if graceful_stop.is_set():
                        break
                    try:
                        start = time.time()
                        delete_rule(rule_id=rule_id, nowait=True)
                        logger(
                            logging.DEBUG, 'deletion of %s took %f' %
                            (rule_id, time.time() - start))
                    except (DatabaseException, DatabaseError,
                            UnsupportedOperation) as e:
                        if match('.*ORA-00054.*', str(e.args[0])):
                            paused_rules[rule_id] = datetime.utcnow(
                            ) + timedelta(seconds=randint(600, 2400))
                            record_counter(
                                'rule.judge.exceptions.LocksDetected')
                            logger(logging.WARNING,
                                   'Locks detected for %s' % rule_id)
                        elif match('.*QueuePool.*', str(e.args[0])):
                            logger(logging.WARNING,
                                   'DatabaseException',
                                   exc_info=True)
                            record_counter('rule.judge.exceptions.%s' %
                                           e.__class__.__name__)
                        elif match('.*ORA-03135.*', str(e.args[0])):
                            logger(logging.WARNING,
                                   'DatabaseException',
                                   exc_info=True)
                            record_counter('rule.judge.exceptions.%s' %
                                           e.__class__.__name__)
                        else:
                            logger(logging.ERROR,
                                   'DatabaseException',
                                   exc_info=True)
                            record_counter('rule.judge.exceptions.%s' %
                                           e.__class__.__name__)
                    except RuleNotFound:
                        pass
        except (DatabaseException, DatabaseError) as e:
            if match('.*QueuePool.*', str(e.args[0])):
                logger(logging.WARNING, 'DatabaseException', exc_info=True)
                record_counter('rule.judge.exceptions.%s' %
                               e.__class__.__name__)
            elif match('.*ORA-03135.*', str(e.args[0])):
                logger(logging.WARNING, 'DatabaseException', exc_info=True)
                record_counter('rule.judge.exceptions.%s' %
                               e.__class__.__name__)
            else:
                logger(logging.CRITICAL, 'DatabaseException', exc_info=True)
                record_counter('rule.judge.exceptions.%s' %
                               e.__class__.__name__)
        except Exception as e:
            logger(logging.CRITICAL, 'DatabaseException', exc_info=True)
            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
        if once:
            break

    die(executable=executable,
        hostname=hostname,
        pid=pid,
        thread=current_thread)
Exemplo n.º 18
0
 def __cleanup_locks_and_rules(self, session=None):
     query = session.query(models.ReplicationRule.id). \
         join(models.ReplicaLock, models.ReplicationRule.id == models.ReplicaLock.rule_id). \
         filter(models.ReplicaLock.rse_id.in_(self.created_rses)).distinct()
     for rule_id, in query:
         rule_core.delete_rule(rule_id, session=session)
Exemplo n.º 19
0
def rule_cleaner(once=False, process=0, total_processes=1, thread=0, threads_per_process=1):
    """
    Main loop to check for expired replication rules
    """

    logging.info('rule_cleaner: starting')

    logging.info('rule_cleaner: started')

    paused_rules = {}  # {rule_id: datetime}

    while not graceful_stop.is_set():
        try:
            start = time.time()
            rules = get_expired_rules(total_workers=total_processes*threads_per_process-1,
                                      worker_number=process*threads_per_process+thread,
                                      limit=1000)
            logging.debug('rule_cleaner index query time %f fetch size is %d' % (time.time() - start, len(rules)))

            # Refresh paused rules
            iter_paused_rules = deepcopy(paused_rules)
            for key in iter_paused_rules:
                if datetime.utcnow() > paused_rules[key]:
                    del paused_rules[key]

            # Remove paused rules from result set
            rules = [rule for rule in rules if rule[0] not in paused_rules]

            if not rules and not once:
                logging.info('rule_cleaner[%s/%s] did not get any work' % (process*threads_per_process+thread, total_processes*threads_per_process-1))
                time.sleep(10)
            else:
                record_gauge('rule.judge.cleaner.threads.%d' % (process*threads_per_process+thread), 1)
                for rule in rules:
                    rule_id = rule[0]
                    rule_expression = rule[1]
                    logging.info('rule_cleaner[%s/%s]: Deleting rule %s with expression %s' % (process*threads_per_process+thread, total_processes*threads_per_process-1, rule_id, rule_expression))
                    if graceful_stop.is_set():
                        break
                    try:
                        start = time.time()
                        delete_rule(rule_id=rule_id, nowait=True)
                        logging.debug('rule_cleaner[%s/%s]: deletion of %s took %f' % (process*threads_per_process+thread, total_processes*threads_per_process-1, rule_id, time.time() - start))
                    except (DatabaseException, DatabaseError, AccessDenied), e:
                        if isinstance(e.args[0], tuple):
                            if match('.*ORA-00054.*', e.args[0][0]):
                                paused_rules[rule_id] = datetime.utcnow() + timedelta(seconds=randint(60, 600))
                                record_counter('rule.judge.exceptions.LocksDetected')
                                logging.warning('rule_cleaner[%s/%s]: Locks detected for %s' % (process*threads_per_process+thread, total_processes*threads_per_process-1, rule_id))
                            else:
                                logging.error(traceback.format_exc())
                                record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                        else:
                            logging.error(traceback.format_exc())
                            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
                record_gauge('rule.judge.cleaner.threads.%d' % (process*threads_per_process+thread), 0)
        except Exception, e:
            record_counter('rule.judge.exceptions.%s' % e.__class__.__name__)
            record_gauge('rule.judge.cleaner.threads.%d' % (process*threads_per_process+thread), 0)
            logging.critical(traceback.format_exc())
        if once:
            return
Exemplo n.º 20
0
 def __cleanup_locks_and_rules(self, rules_to_remove, session=None):
     for rule_id, in rules_to_remove:
         rule_core.delete_rule(rule_id, session=session)
Exemplo n.º 21
0
def run_once(paused_rules, heartbeat_handler, **_kwargs):
    worker_number, total_workers, logger = heartbeat_handler.live()

    try:
        start = time.time()

        # Refresh paused rules
        iter_paused_rules = deepcopy(paused_rules)
        for key in iter_paused_rules:
            if datetime.utcnow() > paused_rules[key]:
                del paused_rules[key]

        rules = get_expired_rules(total_workers=total_workers,
                                  worker_number=worker_number,
                                  limit=200,
                                  blocked_rules=[key for key in paused_rules])
        logger(
            logging.DEBUG, 'index query time %f fetch size is %d' %
            (time.time() - start, len(rules)))

        if not rules:
            logger(
                logging.DEBUG, 'did not get any work (paused_rules=%s)' %
                str(len(paused_rules)))
            return

        for rule in rules:
            _, _, logger = heartbeat_handler.live()
            rule_id = rule[0]
            rule_expression = rule[1]
            logger(
                logging.INFO, 'Deleting rule %s with expression %s' %
                (rule_id, rule_expression))
            if graceful_stop.is_set():
                break
            try:
                start = time.time()
                delete_rule(rule_id=rule_id, nowait=True)
                logger(
                    logging.DEBUG,
                    'deletion of %s took %f' % (rule_id, time.time() - start))
            except (DatabaseException, DatabaseError,
                    UnsupportedOperation) as e:
                if match('.*ORA-00054.*', str(e.args[0])):
                    paused_rules[rule_id] = datetime.utcnow() + timedelta(
                        seconds=randint(600, 2400))
                    record_counter('rule.judge.exceptions.{exception}',
                                   labels={'exception': 'LocksDetected'})
                    logger(logging.WARNING, 'Locks detected for %s' % rule_id)
                elif match('.*QueuePool.*', str(e.args[0])):
                    logger(logging.WARNING, 'DatabaseException', exc_info=True)
                    record_counter('rule.judge.exceptions.{exception}',
                                   labels={'exception': e.__class__.__name__})
                elif match('.*ORA-03135.*', str(e.args[0])):
                    logger(logging.WARNING, 'DatabaseException', exc_info=True)
                    record_counter('rule.judge.exceptions.{exception}',
                                   labels={'exception': e.__class__.__name__})
                else:
                    logger(logging.ERROR, 'DatabaseException', exc_info=True)
                    record_counter('rule.judge.exceptions.{exception}',
                                   labels={'exception': e.__class__.__name__})
            except RuleNotFound:
                pass
    except (DatabaseException, DatabaseError) as e:
        if match('.*QueuePool.*', str(e.args[0])):
            logger(logging.WARNING, 'DatabaseException', exc_info=True)
            record_counter('rule.judge.exceptions.{exception}',
                           labels={'exception': e.__class__.__name__})
        elif match('.*ORA-03135.*', str(e.args[0])):
            logger(logging.WARNING, 'DatabaseException', exc_info=True)
            record_counter('rule.judge.exceptions.{exception}',
                           labels={'exception': e.__class__.__name__})
        else:
            logger(logging.CRITICAL, 'DatabaseException', exc_info=True)
            record_counter('rule.judge.exceptions.{exception}',
                           labels={'exception': e.__class__.__name__})
    except Exception as e:
        logger(logging.CRITICAL, 'DatabaseException', exc_info=True)
        record_counter('rule.judge.exceptions.{exception}',
                       labels={'exception': e.__class__.__name__})