def __iter__(self): for item in self.previous: id = item.get("id", u"") pathkey = self.pathkey(*item.keys())[0] if not pathkey: yield item; continue path = item[pathkey] _id = path.rstrip("/").rsplit("/")[-1] if not id: id = _id # Removes accented characters from composed characters # where possible - and supress non "alphanum_-" chars: # (example: u"maçã" -> u"maca" normal_path = path.rstrip("/").rsplit("/",1)[0] + \ "/" + normalize_url(_id) normal_id = normalize_url(id) plone = get_plone_root(self.transmogrifier.context) if plone: s_id = str(normal_id) if (hasattr(plone, s_id) and s_id not in plone.contentIds()): # This condition would raise a "BadRequest" # exception normal_id += u"_1" normal_path += u"_1" if normal_path == path and normal_id == id: yield item continue if "id" in item: item["id"] = normal_id item[pathkey] = normal_path if not hasattr(self.transmogrifier, "sc_transmogrifier_changed_paths"): self.transmogrifier.sc_transmogrifier_changed_paths = {} self.transmogrifier.sc_transmogrifier_changed_paths[path] = normal_path if not "_orig_path" in item: item["_orig_path"] = normal_path yield item
def transmogrify(self, item): path = self.get_path(item) text = item.get("text", u"") if not text: raise NothingToDoHere # One never knows how those json read # strings end up: if not isinstance(text, unicode): text = text.decode("utf-8") image_matches = re.finditer(ur"""<img.*?src\s*?=\s*?"(.*?)".*?>""", text, re.DOTALL | re.MULTILINE ) images = [] for match in reversed(list(image_matches)): # *** retrieve image URL and name data: start_tag, end_tag, img_title, rel_url, url = \ get_image_refs(match, path, self.source_prefix) if url == rel_url and not self.load_external_images: continue # *** retrieve remote image itself real_url, image, view_parts = get_remote_image( url, item, img_title, self.use_jsonmigrator) if real_url is None: continue image_filename = image["_filename"] # *** Fix image references in item's text html content: if real_url.startswith(self.source_prefix): img_path = real_url[len(self.source_prefix):] else: # the still-not-used external images case # FIXME: won't work for non folderish content types: img_path = path + "/" + url.rsplit("/")[-1] img_ref = img_path if self.embed_images: image["_orig_path"] = img_path img_path = path + "/" + image_filename img_ref = image_filename # Restore the reference to the # special view used in plone, after the # image file name: if view_parts: img_ref += "/%s/" % ("/".join(view_parts)) image["_path"] = img_path if self.replace_references and img_path != rel_url: img_ref_unicode = img_ref.decode("utf-8") # FIXME: For now, sc.transmgorgrifier # fix_path blueprint is not keeping # references in texts in sync # so, we assume it is being used on the pipeline # and normalize the url here # (should not affect well behaved urls anyway) img_ref_unicode = normalize_url(img_ref_unicode) text = text[:start_tag] + img_ref_unicode + text[end_tag:] # *** Set image to be put in the pipeline if self.use_wormhole: self.storage["wormhole"].push(image) else: images.append(image) item["text"] = text return item, images