Пример #1
0
    def process_item(self, item, spider):

        if JobItem.is_older_required(item):
            raise DropItem(
                'Job is published order than %s days. Removing...' % str(config.HOUSEKEEPING_RECORD_ORDLER_THAN))

        return item
Пример #2
0
    def should_load_details(self, job_item):
        if JobItem.is_exists(job_item):
            logger.info(
                '[%s] skipping loading details as job already exists. job_title: %s'
                % (self.name, job_item.job_title))
            return False
        if JobItem.is_older_required(job_item):
            logger.info(
                '[%s] skipping loading details as job is older than %s days. job_title: %s'
                % (self.name, str(config.HOUSEKEEPING_RECORD_ORDLER_THAN),
                   job_item.job_title))
            return False

        if BlockedContact.is_contact_blocked(job_item.contact):
            logger.info(
                '[%s] skipping loading details as job contact is blocked. contact: %s'
                % (self.name, job_item.contact))
            return False

        if RejectionPattern.should_be_rejected(job_item.job_title):
            logger.info(
                '[%s] skipping loading details as job matches rejection pattern. job_title: %s'
                % (self.name, job_item.job_title))
            return False

        return True
Пример #3
0
    def process_item(self, item, spider):

        if JobItem.is_older_required(item):
            raise DropItem('Job is published order than %s days. Removing...' %
                           str(config.HOUSEKEEPING_RECORD_ORDLER_THAN))

        return item
Пример #4
0
    def should_load_details(self, job_item):
        if JobItem.is_exists(job_item):
            logger.info('[%s] skipping loading details as job already exists. job_title: %s' % (self.name, job_item.job_title))
            return False
        if JobItem.is_older_required(job_item):
            logger.info('[%s] skipping loading details as job is older than %s days. job_title: %s' % (self.name, str(config.HOUSEKEEPING_RECORD_ORDLER_THAN), job_item.job_title))
            return False

        if BlockedContact.is_contact_blocked(job_item.contact):
            logger.info('[%s] skipping loading details as job contact is blocked. contact: %s' % (self.name, job_item.contact))
            return False

        if RejectionPattern.should_be_rejected(job_item.job_title):
            logger.info('[%s] skipping loading details as job matches rejection pattern. job_title: %s' % (self.name, job_item.job_title))
            return False

        return True