def put(self): docid = self.request.matchdict['id'] docdata = json.loads(self.request.body) for key in ['created']: if key in docdata: docdata[key] = convertStringToDateTime(docdata[key]) doc = self.db.find_one(docid) doc.update_from_dict(docdata) return 1
def date_converter(request): created_string = request.params.get('created', '') if created_string: try: created = convertStringToDateTime(created_string) except: request.errors.add('parameters', 'created', 'Cannot parse date format') return else: created = datetime.utcnow() request.validated['created'] = created
def handle_update( db, id, tikapath, version, ): doc = db.find_one(id) data = doc.raw_data with NamedTemporaryFile() as tmpfile: tmpfile.write(data) tmpfile.seek(0) cmd = subprocess.Popen(['/usr/bin/java', '-jar', tikapath, tmpfile.name], stdout=subprocess.PIPE) analysis = cmd.communicate()[0] tree = etree.fromstring(analysis) xp = lambda term: tree.xpath(term, namespaces=namespaces) namespaces = dict(html='http://www.w3.org/1999/xhtml') content_type = xp('//html:meta[@name="Content-Type"]/@content') date = xp('//html:meta[@name="Creation-Date"]/@content') if date: date = convertStringToDateTime(date[0]) content = xp('//html:body/*') if content: content = ''.join([etree.tostring(x) for x in content]) text = ' '.join(xp('//*/text()')) text = texthelpers.replace_whitespace(text.replace('\n', ' ' )).strip() description = texthelpers.truncate(text, 100, '', whole_word=True) if content_type: doc.update_plugin_and_canonical_attr('content_type', content_type[0]) if date: doc.update_plugin_and_canonical_attr('created', date) if content: doc.update_plugin_attr('full_html', content) doc.register_html_representation('full_html') if text: doc.update_plugin('text', text) doc.register_searchable_field("text") if description: doc.update_plugin_and_canonical_attr('description', description) doc.finish_parsing(version) doc.reindex()
def handle_update( db, id, tikapath, version, ): doc = db.find_one(id) data = doc.raw_data with NamedTemporaryFile() as tmpfile: tmpfile.write(data) tmpfile.seek(0) cmd = subprocess.Popen( ['/usr/bin/java', '-jar', tikapath, tmpfile.name], stdout=subprocess.PIPE) analysis = cmd.communicate()[0] tree = etree.fromstring(analysis) xp = lambda term: tree.xpath(term, namespaces=namespaces) namespaces = dict(html='http://www.w3.org/1999/xhtml') content_type = xp('//html:meta[@name="Content-Type"]/@content') date = xp('//html:meta[@name="Creation-Date"]/@content') if date: date = convertStringToDateTime(date[0]) content = xp('//html:body/*') if content: content = ''.join([etree.tostring(x) for x in content]) text = ' '.join(xp('//*/text()')) text = texthelpers.replace_whitespace(text.replace('\n', ' ')).strip() description = texthelpers.truncate(text, 100, '', whole_word=True) if content_type: doc.update_plugin_and_canonical_attr('content_type', content_type[0]) if date: doc.update_plugin_and_canonical_attr('created', date) if content: doc.update_plugin_attr('full_html', content) doc.register_html_representation('full_html') if text: doc.update_plugin('text', text) doc.register_searchable_field("text") if description: doc.update_plugin_and_canonical_attr('description', description) doc.finish_parsing(version) doc.reindex()