def __init__(self, baseurl, node, client=None): self.baseurl = baseurl.decode(sys.getfilesystemencoding()) self.node = node self.client = client self.numdirs = 0 resources = {} resources['mimes'] = {} resources['urls'] = {} url = Url(self.baseurl, self.baseurl) url.createNode(node, _('Contents of directory')) resources['urls'][url.relpath] = url try: for root, dirs, files in self._safewalk(self.baseurl): if self.cancel: return self.numdirs += 1 except UnicodeDecodeError: raise i = 1 for root, dirs, files in self._safewalk(self.baseurl): html = u"" idevice = None if self.client: self.client.call('eXe.app.getController("Toolbar").updateImportProgressWindow',_(u'Analizing directory %d of %d: %s') % (i, self.numdirs,root.encode(sys.getfilesystemencoding()))) for dir in dirs: if self.cancel: return path = root + os.path.sep + dir url = Url(path, self.baseurl) url.createNode(resources['urls'][url.parentpath].node) resources['urls'][url.relpath] = url for file in files: if self.cancel: return path = root + os.path.sep + file url = Url(path, self.baseurl) parent = resources['urls'][url.parentpath] if not idevice: idevice = parent.createIdevice() try: p = Path(path) p.setSalt(str(url)) r = Resource(idevice,p) except: continue url.href = 'resources/%s' % (quote(r.storageName)) html += u"<p><a href=%s>%s</p>\n" % (url.href,url.basename) resources['urls'][url.relpath] = url if url.mime in resources['mimes'].keys(): resources['mimes'][url.mime].append(url) else: resources['mimes'][url.mime] = [ url ] if idevice: idevice.setContent(html) i += 1 self.resources = resources
def _insertNode(self, node, url, depth=0, idevice=None): if self.cancel: return if isinstance(url,str): link = None url = self.resources['urls'][url] elif isinstance(url,Link): link = url url = link.url if url.mime == 'text/html' and self.depths[str(url)] >= depth: if self.client: self.client.call('eXe.app.getController("Toolbar").updateImportProgressWindow',_(u'Inserting %s') % (str(url))) type = link.tag.name if link and link.tag else 'a' if type not in ['frame','iframe'] and node: node = node.createChild() node.setTitle(self._guessName(url)) if depth == 1: node.up() if not node: node = self.node parent = idevice if type in ['frame','iframe'] else None idevice = FreeTextIdevice(type=type,parent=parent) idevice.edit = False node.addIdevice(idevice) if url.type == "file": p = Path(self.baseurl + os.path.sep + str(url)) p.setSalt(str(url)) r = Resource(idevice,p) url.storageName = quote(r.storageName) if link and link.relative not in link.referrer.contentUpdated: if link.match: link.referrer.content = link.referrer.content.replace(link.match,'###resources###/%s' % (url.storageName)) else: link.referrer.content = link.referrer.content.replace(link.relative,'###resources###/%s' % (url.storageName)) link.referrer.contentUpdated.append(link.relative) if self.depths[str(url)] < depth: return for l in url.links: if self.cancel: return self._insertNode(node, l, depth+1, idevice) content = url.getContent() if content: content_w_resourcePaths = re.sub('###resources###/','resources/',content) content_wo_resourcePaths = re.sub('###resources###/','',content) if url.mime == "text/html" and idevice: soup = url.getSoup() if soup and soup.declaredHTMLEncoding: content_w_resourcePaths = re.sub(soup.declaredHTMLEncoding,'utf-8',content_w_resourcePaths,re.IGNORECASE) content_wo_resourcePaths = re.sub(soup.declaredHTMLEncoding,'utf-8',content_wo_resourcePaths,re.IGNORECASE) if soup and soup.find('frameset'): idevice.type = 'frameset' idevice.setContent(content_w_resourcePaths,content_wo_resourcePaths) f = open(r.path,"w") f.write(content_wo_resourcePaths.encode('utf-8')) f.close()
def _insertNode(self, node, url, depth=0, idevice=None): if self.cancel: return if isinstance(url, str): link = None url = self.resources['urls'][url] elif isinstance(url, Link): link = url url = link.url if url.mime == 'text/html' and self.depths[str(url)] >= depth: if self.client: self.client.call( 'eXe.app.getController("Toolbar").updateImportProgressWindow', _(u'Inserting %s') % (str(url))) type = link.tag.name if link and link.tag else 'a' if type not in ['frame', 'iframe'] and node: node = node.createChild() node.setTitle(self._guessName(url)) if depth == 1: node.up() if not node: node = self.node parent = idevice if type in ['frame', 'iframe'] else None idevice = FreeTextIdevice(type=type, parent=parent) idevice.edit = False node.addIdevice(idevice) if url.type == "file": p = Path(self.baseurl + os.path.sep + str(url)) p.setSalt(str(url)) r = Resource(idevice, p) url.storageName = quote(r.storageName) if link and link.relative not in link.referrer.contentUpdated: if link.match: link.referrer.content = link.referrer.content.replace( link.match, '###resources###/%s' % (url.storageName)) else: link.referrer.content = link.referrer.content.replace( link.relative, '###resources###/%s' % (url.storageName)) link.referrer.contentUpdated.append(link.relative) if self.depths[str(url)] < depth: return for l in url.links: if self.cancel: return self._insertNode(node, l, depth + 1, idevice) content = url.getContent() if content: content_w_resourcePaths = re.sub('###resources###/', 'resources/', content) content_wo_resourcePaths = re.sub('###resources###/', '', content) if url.mime == "text/html" and idevice: soup = url.getSoup() if soup and soup.declaredHTMLEncoding: content_w_resourcePaths = re.sub(soup.declaredHTMLEncoding, 'utf-8', content_w_resourcePaths, re.IGNORECASE) content_wo_resourcePaths = re.sub( soup.declaredHTMLEncoding, 'utf-8', content_wo_resourcePaths, re.IGNORECASE) if soup and soup.find('frameset'): idevice.type = 'frameset' idevice.setContent(content_w_resourcePaths, content_wo_resourcePaths) f = open(r.path, "w") f.write(content_wo_resourcePaths.encode('utf-8')) f.close()