def remove_tags(self, s): d = { '–': '-', '·': '.', ' ': ' ', '“': '', '§': '', 'Ä': u'Ä', 'Ö': u'Ö', 'Ü': u'Ü', 'ä': u'ä', 'ö': u'ö', 'ü': u'ü', 'ß': u'ß' } s = str(s) for x in d: s = s.replace(x, d[x]) s = standard.re_sub('<script(.*?)>(.|\\n|\\r|\\t)*?</script>', ' ', s) s = standard.re_sub('<style(.*?)>(.|\\n|\\r|\\t)*?</style>', ' ', s) s = standard.re_sub('<[^>]*>', ' ', s) while s.find('\t') >= 0: s = s.replace('\t', ' ') while s.find('\n') >= 0: s = s.replace('\n', ' ') while s.find('\r') >= 0: s = s.replace('\r', ' ') while s.find(' ') >= 0: s = s.replace(' ', ' ') s = s.strip() return s
def search_xml(self, q, page_index=0, page_size=10, REQUEST=None, RESPONSE=None): """ ZMSZCatalogSolrConnector.search_xml """ # Check constraints. zcm = self.getCatalogAdapter() attrs = zcm.getAttrs() page_index = int(page_index) page_size = int(page_size) REQUEST.set('lang', REQUEST.get('lang', self.getPrimaryLanguage())) RESPONSE = REQUEST.RESPONSE content_type = 'text/xml; charset=utf-8' RESPONSE.setHeader('Content-Type', content_type) RESPONSE.setHeader('Cache-Control', 'no-cache') RESPONSE.setHeader('Pragma', 'no-cache') RESPONSE.setHeader('Access-Control-Allow-Origin', '*') # Execute query. p = {} p['q'] = q p['wt'] = 'xml' p['start'] = page_index p['rows'] = page_size p['defType'] = 'edismax' p['qf'] = ' '.join([ '%s^%s' % (self._get_field_name(x), standard.pystr(attrs[x].get('boost', 1.0))) for x in attrs ]) p['hl'] = 'true' p['hl.fragsize'] = self.getConfProperty('solr.select.hl.fragsize', 200) p['hl.fl'] = self.getConfProperty( 'solr.select.hl.fl', ','.join([self._get_field_name(x) for x in attrs])) p['hl.simple.pre'] = self.getConfProperty('solr.select.hl.simple.pre', '<span class="highlight">') p['hl.simple.post'] = self.getConfProperty( 'solr.select.hl.simple.post', '</span>') solr_url = self.getConfProperty('solr.url', 'http://localhost:8983/solr') solr_core = self.getConfProperty('solr.core', self.getAbsoluteHome().id) url = '%s/%s/select' % (solr_url, solr_core) url = self.url_append_params(url, p, sep='&') result = self.http_import(url, method='GET') result = standard.re_sub('name="(.*?)_[ist]"', 'name="\\1"', result) return result
def re_sub(self, pattern, replacement, subject, ignorecase=False): warn(self, 're_sub', 'Products.zms.standard.re_sub') return standard.re_sub(pattern, replacement, subject, ignorecase)
def recurse_downloadHtmlPages(self, obj, path, lang, REQUEST): try: os.mkdir(path) except: pass level = obj.getLevel() key = 'index' REQUEST.set('ZMS_PATH_HANDLER', True) try: # Remember others. others = copy.copy(REQUEST.other.keys()) root = getattr(obj, '__root__', None) if root is not None: REQUEST.set('ZMS_PROXY_%s' % root.id, obj) html = root.f_index_html(root, REQUEST) else: html = obj.f_index_html(obj, REQUEST) # Remove new others. for rk in REQUEST.other.keys(): if rk not in others: try: del REQUEST.other[rk] except: pass # Blank lines in includes cause PHP session errors # @see http://bugs.php.net/bug.php?id=8974 html = standard.re_sub(r'^\s*', '', html) # Localize html. html = localHtml(obj, html) # Save html to file. if level > 0 and \ self.getConfProperty('ZMS.pathhandler', 0) != 0 and \ self.getConfProperty('ZMS.export.pathhandler', 0) == 1: html = localIndexHtml(self, obj, level - 1, html) filename = '%s/../%s%s' % (path, obj.getDeclId(REQUEST), obj.getPageExt(REQUEST)) else: pageext = obj.getPageExt(REQUEST) html = localIndexHtml(self, obj, level - self.getLevel(), html) filename = '%s/%s_%s%s' % (path, key, lang, pageext) html = self.exportExternalResources(obj, html, path, REQUEST) # @see http://docs.python.org/howto/unicode.html (Reading and Writing Unicode Data) encoding = REQUEST.get('ZMS_CHARSET', 'utf-8') mode = 'w' writeFile(obj, filename, html, mode, encoding) # Root folder requires and defaults to "index.html" at most systems. if lang == self.getPrimaryLanguage(): filename = '%s/%s%s' % (path, key, obj.getPageExt(REQUEST)) writeFile(obj, filename, html, mode, encoding) except: standard.writeError( obj, "[recurse_downloadHtmlPages]: Can't get html '%s'" % key) # Process methods of meta-objects. for metadictAttrId in self.getMetaobjAttrIds(obj.meta_id): try: metadictAttr = self.getMetaobjAttr(obj.meta_id, metadictAttrId) if metadictAttr is not None and metadictAttr[ 'meta_type'] and metadictAttr[ 'type'] in self.getMetaobjIds(): metaObj = self.getMetaobj(metadictAttr['type']) if metaObj['type'] == 'ZMSResource': for metadictObj in obj.getObjChildren( metadictAttr['id'], REQUEST): for metaObjAttr in metaObj['attrs']: if metaObjAttr['type'] in [ 'DTML Document', 'DTML Method', 'Page Template', 'Script (Python)' ]: ob = getattr(obj, metaObjAttr['id']) html = ob(obj, REQUEST) html = localHtml(obj, html) filename = '%s/%s' % (path, metaObjAttr['id']) f = open(filename, 'w') f.write(html) f.close() except: standard.writeError( self, "[recurse_downloadHtmlPages]: can't process method '%s' of meta-object" % metadictAttr) # Process children. for child in obj.filteredChildNodes(REQUEST, self.PAGES): self.recurse_downloadHtmlPages( child, '%s/%s' % (path, child.getDeclId(REQUEST)), lang, REQUEST)
def manage_zcatalog_update_documents(self): msg = [] request = self.REQUEST RESPONSE = request.RESPONSE zmscontext = self.getLinkObj(request.get('uid', '{$}')) home = zmscontext.getDocumentElement() home_id = home.getPhysicalPath() home_id = home_id[home_id.index('content') - 1] inst_home = zmscontext.Control_Panel.getINSTANCE_HOME() path = home_id node = home while True: node = node.getPortalMaster() if node is None: break path = node.getHome().getId() + '/' + path xmlpath = '%s/var/%s/sitemap.xml' % (inst_home, path) xml = standard.localfs_read(xmlpath, mode={ 'threshold': -1 }).decode('utf-8') msg.append('%i xml-bytes read from %s' % (len(xml), xmlpath)) def update(action, xml): solr_url = zmscontext.getConfProperty('solr.url', 'http://*****:*****@@') if i >= 0: body = doc[i + len('">@@'):] body = body[:body.find('</field>')] id = standard.re_search('<field name="id"(.*?)>(.*?)<\/field>', doc)[1] bins.append({'id': id, 'body': body}) doc = standard.re_sub( '<field name="(.*?)_t"(.*?)>@@(.*?)<\/field>', '<field name="\\1_t"\\2></field>', doc) buff.append(doc) docs.remove(docs[0]) msg.append(update("commit", get_command_xml('commit'))) for b in bins: body = b['body'].split(':') ids = body[0].split('/')[1:] attr_name = body[1] node = home while ids: node = getattr(node, ids.pop(0)) d = { 'literal.id': b['id'], 'commit': 'true', 'myfile': node.attr(attr_name).getData(request) } msg.append(update_extract("extract", d)) msg.append(update("optimize", get_command_xml('optimize'))) RESPONSE.setHeader('Content-Type', 'text/plain;charset=utf-8') msg.append("Done!") return '\n'.join(msg) # --// /manage_zcatalog_update_documents //--