def save(self, *args, **kwargs): # If this is the first time the model is saved, then the seeds # file needs to be moved from SEEDS_TMP_DIR/filename to the # crawl directory. if self.pk is None: # Need to save first to obtain the pk attribute. self.slug = slugify(unicode(self.name)) self.location = os.path.join(self.project.slug, "crawls", self.slug) super(Crawl, self).save(*args, **kwargs) # Ensure that the crawl path `resources/crawls/<crawl.pk>` exists crawl_path = self.ensure_crawl_path() # Move the file from temporary directory to crawl directory, # and update the FileField accordingly: # https://code.djangoproject.com/ticket/15590#comment:10 # Nutch requires a seed directory, not a seed file if self.crawler == 'nutch': seed_dir = join(crawl_path, 'seeds') ensure_exists(seed_dir) dst = join(crawl_path, 'seeds/seeds') shutil.move(self.seeds_list.path, dst) self.seeds_list.name = seed_dir else: dst = join(crawl_path, 'seeds') shutil.move(self.seeds_list.path, dst) self.seeds_list.name = dst # Create unique configs for every ache crawl. if os.path.exists(self.get_config_path()): shutil.rmtree(self.get_config_path()) shutil.copytree(self.get_default_config(), self.get_config_path()) self.config = self.get_config_path() # Continue saving as normal self.slug = slugify(unicode(self.name)) super(Crawl, self).save(*args, **kwargs)
def save(self, *args, **kwargs): # If this is the first time the model is saved, then the seeds # file needs to be moved from SEEDS_TMP_DIR/filename to the # crawl directory. if self.pk is None: # Need to save first to obtain the pk attribute. self.slug = slugify(unicode(self.name)) self.location = os.path.join(self.project.slug, self.slug) super(Crawl, self).save(*args, **kwargs) # Ensure that the crawl path `resources/crawls/<crawl.pk>` exists crawl_path = self.ensure_crawl_path() # Move the file from temporary directory to crawl directory, # and update the FileField accordingly: # https://code.djangoproject.com/ticket/15590#comment:10 # Nutch requires a seed directory, not a seed file if self.crawler == 'nutch': seed_dir = join(crawl_path, 'seeds') ensure_exists(seed_dir) dst = join(crawl_path, 'seeds/seeds') shutil.move(self.seeds_list.path, dst) self.seeds_list.name = seed_dir else: dst = join(crawl_path, 'seeds') shutil.move(self.seeds_list.path, dst) self.seeds_list.name = dst # Create unique configs for every ache crawl. if os.path.exists(self.get_config_path()): shutil.rmtree(self.get_config_path()) shutil.copytree(self.get_default_config(), self.get_config_path()) self.config = self.get_config_path() # Continue saving as normal self.slug = slugify(unicode(self.name)) super(Crawl, self).save(*args, **kwargs)
def ensure_model_path(self): model_path = self.get_model_path() ensure_exists(model_path) return model_path
def ensure_crawl_path(self): crawl_path = self.get_crawl_path() ensure_exists(crawl_path) return crawl_path
def ensure_crawl_path(self): crawl_path = self.crawl_location ensure_exists(crawl_path) return crawl_path