def update_catalog(self, item, spider): """Update the catalog (list of mangas and issues).""" # A catalog is a list of mangas (collections) updates. The # manga item from a catalog update can have more information # that the one created from a collection update. For now only # 'rank' is include in the catalog and not in the collection. # The removal (delete) of collection are done outside. Here # we only receive one item at a time, so we can't see the # items that are not anymore in the database. The field # `updated` can be used here (only for collection, that is # always updated) spider_name = spider.name.lower() source = Source.objects.get(spider=spider_name) try: manga = Manga.objects.get(url=item['url'], source=source) except Manga.DoesNotExist: manga = Manga(url=item['url'], source=source) manga.rank = item['rank'] manga.rank_order = item['rank_order'] self.update_collection(item, spider, manga=manga)
def update_collection(self, item, spider, manga=None): """Update a collection of issues (a manga).""" spider_name = spider.name.lower() source = Source.objects.get(spider=spider_name) if not manga: try: manga = Manga.objects.get(url=item['url'], source=source) except Manga.DoesNotExist: manga = Manga(url=item['url'], source=source) ignore_fields = ('rank', 'rank_order') exceptions = ('alt_name', 'genres', 'image_urls', 'images', 'issues') fields = [f for f in item if f not in (ignore_fields + exceptions)] # Update the fields of the manga object for f in fields: self._sic(manga, item, f) # Save the object to have a PK (creation of relations). Also # update the the `modified` field to signalize that the Manga # is still there (share semantic with `last_seen`) manga.save() # alt_name alt_names = [{'name': i} for i in item['alt_name']] self._update_relation(manga, 'altname_set', 'name', alt_names, self._update_name) # genres genres = [{'name': i} for i in item['genres']] self._update_relation(manga, 'genres', 'name', genres, self._update_name, m2m=source.genre_set.all()) # cover if item['images']: path = urlparse.urlparse(item['image_urls'][0]).path name = os.path.basename(path) image_path = os.path.join(self.images_store, item['images'][0]['path']) # Update the cover always, so if we remove the image in # the MEDIA directory, this will be recreated. manga.cover.delete(save=False) manga.cover.save(name, File(open(image_path, 'rb'))) elif manga.cover: manga.cover.delete() # issues self._update_relation(manga, 'issue_set', 'url', item['issues'], self._update_issue)
def full_scan(request): """ Makes the full scan on mangareader, checking for every manga available in their catalogue, and adding to database. Creates a log with the SuperUser that triggered the full_scan call, scraps all titles and then insert on database if not already exists. :param request: Django request object :return: """ log_basic_entry(request, 'Full Scan started') all_series_array = get_all_series() for series in all_series_array: try: Manga.objects.get(series_name=series[0], manga_reader_url=series[1]) except Manga.DoesNotExist: manga = Manga(series_name=series[0], manga_reader_url=series[1]) manga.save() log_basic_entry(request, 'Full Scan finished successfully') return HttpResponse('Full scan executed.')
def update_collection(self, item, spider): """Update a collection of issues (a manga).""" spider_name = spider.name.lower() source = Source.objects.get(spider=spider_name) try: manga = Manga.objects.get(url=item['url'], source=source) except Manga.DoesNotExist: manga = Manga(url=item['url'], source=source) # Relations are synchronized later on relations = ('alt_name', 'genres', 'image_urls', 'images', 'issues') fields = [f for f in item if f not in relations] # Note here that some fields are available when the entry # point is via `update_catalog`, and not via # `update_collection`. For example, some sources do not # provide information about `rank` in the manga register, but # only in the catalog view. In that case we do not want to # overwrite, or use the default value when the item do not # contain this information. # # The solution proposed here is to iterate only for the values # that are in the `item`, and delegate in `clean` the # detection of the values that are required. # # Update the fields of the manga object that are populated for f in fields: self._sic(manga, item, f) # Save the object to have a PK (creation of relations). Also # update the the `modified` field to signalize that the Manga # is still there (share semantic with `last_seen`) manga.save() # alt_name alt_names = [{'name': i} for i in item['alt_name']] self._update_relation(manga, 'altname_set', 'name', alt_names, self._update_name) # genres genres = [{'name': i} for i in item['genres']] self._update_relation(manga, 'genres', 'name', genres, self._update_name, m2m=source.genre_set.all()) # cover if item['images']: path = urllib.parse.urlparse(item['image_urls'][0]).path name = os.path.basename(path) image_path = os.path.join(self.images_store, item['images'][0]['path']) # Update the cover always, so if we remove the image in # the MEDIA directory, this will be recreated. manga.cover.delete(save=False) with open(image_path, 'rb') as f: manga.cover.save(name, File(f)) elif manga.cover: manga.cover.delete() # issues self._update_relation(manga, 'issue_set', 'url', item['issues'], self._update_issue)