示例#1
0
    def save(self, *args, **kwargs):
        # If this is the first time the model is saved, then the seeds
        #    file needs to be moved from SEEDS_TMP_DIR/filename to the
        #    crawl directory.
        if self.pk is None:
            # Need to save first to obtain the pk attribute.
            self.slug = slugify(unicode(self.name))
            self.location = os.path.join(self.project.slug, "crawls", self.slug)
            super(Crawl, self).save(*args, **kwargs)

            # Ensure that the crawl path `resources/crawls/<crawl.pk>` exists
            crawl_path = self.ensure_crawl_path()

            # Move the file from temporary directory to crawl directory,
            #   and update the FileField accordingly:
            #   https://code.djangoproject.com/ticket/15590#comment:10

            # Nutch requires a seed directory, not a seed file
            if self.crawler == 'nutch':
                seed_dir = join(crawl_path, 'seeds')
                ensure_exists(seed_dir)
                dst = join(crawl_path, 'seeds/seeds')
                shutil.move(self.seeds_list.path, dst)
                self.seeds_list.name = seed_dir
            else:
                dst = join(crawl_path, 'seeds')
                shutil.move(self.seeds_list.path, dst)
                self.seeds_list.name = dst
                # Create unique configs for every ache crawl.
                if os.path.exists(self.get_config_path()):
                    shutil.rmtree(self.get_config_path())
                shutil.copytree(self.get_default_config(), self.get_config_path())
                self.config = self.get_config_path()


            # Continue saving as normal

        self.slug = slugify(unicode(self.name))
        super(Crawl, self).save(*args, **kwargs)
示例#2
0
    def save(self, *args, **kwargs):
        # If this is the first time the model is saved, then the seeds
        #    file needs to be moved from SEEDS_TMP_DIR/filename to the
        #    crawl directory.
        if self.pk is None:
            # Need to save first to obtain the pk attribute.
            self.slug = slugify(unicode(self.name))
            self.location = os.path.join(self.project.slug, self.slug)
            super(Crawl, self).save(*args, **kwargs)

            # Ensure that the crawl path `resources/crawls/<crawl.pk>` exists
            crawl_path = self.ensure_crawl_path()

            # Move the file from temporary directory to crawl directory,
            #   and update the FileField accordingly:
            #   https://code.djangoproject.com/ticket/15590#comment:10

            # Nutch requires a seed directory, not a seed file
            if self.crawler == 'nutch':
                seed_dir = join(crawl_path, 'seeds')
                ensure_exists(seed_dir)
                dst = join(crawl_path, 'seeds/seeds')
                shutil.move(self.seeds_list.path, dst)
                self.seeds_list.name = seed_dir
            else:
                dst = join(crawl_path, 'seeds')
                shutil.move(self.seeds_list.path, dst)
                self.seeds_list.name = dst
                # Create unique configs for every ache crawl.
                if os.path.exists(self.get_config_path()):
                    shutil.rmtree(self.get_config_path())
                shutil.copytree(self.get_default_config(),
                                self.get_config_path())
                self.config = self.get_config_path()

            # Continue saving as normal

        self.slug = slugify(unicode(self.name))
        super(Crawl, self).save(*args, **kwargs)
示例#3
0
    def ensure_model_path(self):
        model_path = self.get_model_path()
        ensure_exists(model_path)

        return model_path
示例#4
0
 def ensure_crawl_path(self):
     crawl_path = self.get_crawl_path()
     ensure_exists(crawl_path)
     return crawl_path
示例#5
0
 def ensure_model_path(self):
     model_path = self.get_model_path()
     ensure_exists(model_path)
     return model_path
示例#6
0
 def ensure_crawl_path(self):
     crawl_path = self.get_crawl_path()
     ensure_exists(crawl_path)
     return crawl_path
示例#7
0
 def ensure_crawl_path(self):
     crawl_path = self.crawl_location
     ensure_exists(crawl_path)
     return crawl_path
示例#8
0
 def ensure_crawl_path(self):
     crawl_path = self.crawl_location
     ensure_exists(crawl_path)
     return crawl_path