Exemplo n.º 1
0
class TestIssueCache(unittest.TestCase):

    def setUp(self):
        self.cache = IssueCache('tests/fixtures/tmp', 'tests/fixtures/images')

    def tearDown(self):
        shutil.rmtree('tests/fixtures/tmp')

    def test_cache(self):
        self.cache['url1'] = [{'images': [{'path': 'width-large.jpg'}]}]
        self.cache['url2'] = [
            {'images': [{'path': 'width-large.jpg'}]},
            {'images': [{'path': 'width-small.jpg'}]},
            # Repressent a missing image
            {'images': []}
        ]
        self.cache['url3'] = [{'images': [{'path': 'width-small.jpg'}]}]
        self.assertTrue(len(self.cache) == 3)

        self.assertEqual(self.cache['url1'][0],
                         [{'images': [{'path': 'width-large.jpg'}]}])

        for key in self.cache:
            self.assertTrue(key in ('url1', 'url2', 'url3'))
            v = self.cache[key]
            if key in ('url1', 'url3'):
                self.assertTrue(len(v[0]) == 1)
            else:
                self.assertTrue(len(v[0]) == 3)
        del self.cache['url1']
        self.assertTrue(len(self.cache) == 2)
        self.assertTrue('url1' not in self.cache)
        self.assertTrue('url2' in self.cache)
        self.assertTrue('url3' in self.cache)

    def test_is_valid(self):
        self.cache['url1'] = [
            {'images': [{'path': 'width-large.jpg'}]},
            {'images': []}
        ]
        # Create a temporal image
        open('tests/fixtures/images/missing-image.jpg', 'a').close()
        self.cache['url2'] = [{'images': [{'path': 'missing-image.jpg'}]}]
        # Remove temporal image
        os.unlink('tests/fixtures/images/missing-image.jpg')
        self.assertTrue(self.cache.is_valid('url1'))
        self.assertFalse(self.cache.is_valid('url2'))
        self.assertFalse(self.cache.is_valid('url3'))
Exemplo n.º 2
0
def _create_mobi(issue, result=None):
    """RQ job to create a single MOBI document."""
    issue_cache = IssueCache(settings.ISSUES_STORE, settings.IMAGES_STORE)

    if issue.url not in issue_cache:
        logger.error('Issue not found in issue cache (%s)' % issue)
        if result:
            result.set_status(Result.FAILED)
    elif not issue_cache.is_valid(issue.url):
        logger.error('Issue in issue cache is not valid (%s)' % issue)
        if result:
            result.set_status(Result.FAILED)
        del issue_cache[issue.url]
    else:
        images, _ = issue_cache[issue.url]
        mobictl = MobiCtl(issue, images, settings.IMAGES_STORE)
        mobictl.create_mobi()
Exemplo n.º 3
0
    def collect(self, spider):
        # Signalize as an error the missing self.items, probably there
        # is a hidden bug in the spider.
        if not self.items:
            logger.error('Items are empty, please check [%s]' % spider)
            return

        cache = IssueCache(self.issues_store, self.images_store)
        for url, images in self.items.items():
            cache[url] = images
Exemplo n.º 4
0
 def scrape(self, issues, dry_run=False):
     """Create crawlers to scrape issues."""
     cache = IssueCache(settings.ISSUES_STORE, settings.IMAGES_STORE)
     crawlers = [
         self._create_crawler(issue.manga.source.name.lower(),
                              issue.manga.name, issue.number, issue.url,
                              dry_run) for issue in issues
         if issue.url not in cache
     ]
     process_control = ProcessControl(crawlers, self.process)
     process_control.run()
Exemplo n.º 5
0
    def handle(self, *args, **options):
        command = options['command']

        actions = ('force', 'list', 'remove')
        if not any(options[i] for i in actions):
            msg = 'Please, provide one action: %s' % '|'.join(actions)
            raise CommandError(msg)

        # `cover` command do not use the `days` nor `hours` parameter
        if options['days'] is None and options['hours'] is None \
           and command not in ('cover',):
            raise CommandError('Provide some days/hours to find old objects.')
        elif options['days'] or options['hours']:
            hours = 24 * int(options['days'] if options['days'] else 0)
            hours += int(options['hours'] if options['hours'] else 0)

        sources = self._get_sources(options['spiders'])
        remove = options['remove']
        list_ = options['list']
        force = options['force']
        list_ = list_ or not force

        loglevel = options['loglevel']
        logger.setLevel(loglevel)

        if command == 'manga':
            self._clean_manga(hours, sources, list_)
        elif command == 'user':
            self._clean_user(hours, remove, list_)
        elif command == 'image-cache':
            cache = os.path.join(settings.IMAGES_STORE, 'full')
            self._clean_image_cache(hours, cache, list_)
        elif command == 'mobi-cache':
            cache = MobiCache(settings.MOBI_STORE)
            self._clean_cache(hours, cache, list_)
        elif command == 'issue-cache':
            cache = IssueCache(settings.ISSUES_STORE, settings.IMAGES_STORE)
            self._clean_cache(hours, cache, list_)
            mobi_cache = MobiCache(settings.MOBI_STORE)
            self._clean_broken_issue_cache(cache, mobi_cache, list_)
        elif command == 'cover':
            self._clean_cover(sources, list_)
        elif command == 'result-processing':
            self._clean_result(hours, Result.PROCESSING, list_)
        elif command == 'result-failed':
            self._clean_result(hours, Result.FAILED, list_)
        else:
            raise CommandError('Not valid command value.')
Exemplo n.º 6
0
 def setUp(self):
     self.cache = IssueCache('tests/fixtures/tmp', 'tests/fixtures/images')