Пример #1
0
 def put(self):
     docid = self.request.matchdict['id']
     docdata = json.loads(self.request.body)
     for key in ['created']:
         if key in docdata:
             docdata[key] = convertStringToDateTime(docdata[key])
     doc = self.db.find_one(docid)
     doc.update_from_dict(docdata)
     return 1
Пример #2
0
 def put(self):
     docid = self.request.matchdict['id']
     docdata = json.loads(self.request.body)
     for key in ['created']:
         if key in docdata:
             docdata[key] = convertStringToDateTime(docdata[key])
     doc = self.db.find_one(docid)
     doc.update_from_dict(docdata)
     return 1
Пример #3
0
def date_converter(request):
    created_string = request.params.get('created', '')
    if created_string:
        try:
            created = convertStringToDateTime(created_string)
        except:
            request.errors.add('parameters', 'created',
                               'Cannot parse date format')
            return
    else:
        created = datetime.utcnow()
    request.validated['created'] = created
Пример #4
0
def date_converter(request):
    created_string = request.params.get('created', '')
    if created_string:
        try:
            created = convertStringToDateTime(created_string)
        except:
            request.errors.add('parameters', 'created',
                               'Cannot parse date format')
            return
    else:
        created = datetime.utcnow()
    request.validated['created'] = created
Пример #5
0
def handle_update(
    db,
    id,
    tikapath,
    version,
    ):

    doc = db.find_one(id)
    data = doc.raw_data
    with NamedTemporaryFile() as tmpfile:
        tmpfile.write(data)
        tmpfile.seek(0)
        cmd = subprocess.Popen(['/usr/bin/java', '-jar', tikapath,
                               tmpfile.name], stdout=subprocess.PIPE)
        analysis = cmd.communicate()[0]
        tree = etree.fromstring(analysis)
        xp = lambda term: tree.xpath(term, namespaces=namespaces)
        namespaces = dict(html='http://www.w3.org/1999/xhtml')
        content_type = xp('//html:meta[@name="Content-Type"]/@content')
        date = xp('//html:meta[@name="Creation-Date"]/@content')
        if date:
            date = convertStringToDateTime(date[0])
        content = xp('//html:body/*')
        if content:
            content = ''.join([etree.tostring(x) for x in content])
        text = ' '.join(xp('//*/text()'))
        text = texthelpers.replace_whitespace(text.replace('\n', ' '
                )).strip()
        description = texthelpers.truncate(text, 100, '',
                whole_word=True)

        if content_type:
            doc.update_plugin_and_canonical_attr('content_type',
                    content_type[0])
        if date:
            doc.update_plugin_and_canonical_attr('created', date)
        if content:
            doc.update_plugin_attr('full_html', content)
            doc.register_html_representation('full_html')
        if text:
            doc.update_plugin('text', text)
            doc.register_searchable_field("text")
        if description:
            doc.update_plugin_and_canonical_attr('description', description)
        doc.finish_parsing(version)
        doc.reindex()
Пример #6
0
def handle_update(
    db,
    id,
    tikapath,
    version,
):

    doc = db.find_one(id)
    data = doc.raw_data
    with NamedTemporaryFile() as tmpfile:
        tmpfile.write(data)
        tmpfile.seek(0)
        cmd = subprocess.Popen(
            ['/usr/bin/java', '-jar', tikapath, tmpfile.name],
            stdout=subprocess.PIPE)
        analysis = cmd.communicate()[0]
        tree = etree.fromstring(analysis)
        xp = lambda term: tree.xpath(term, namespaces=namespaces)
        namespaces = dict(html='http://www.w3.org/1999/xhtml')
        content_type = xp('//html:meta[@name="Content-Type"]/@content')
        date = xp('//html:meta[@name="Creation-Date"]/@content')
        if date:
            date = convertStringToDateTime(date[0])
        content = xp('//html:body/*')
        if content:
            content = ''.join([etree.tostring(x) for x in content])
        text = ' '.join(xp('//*/text()'))
        text = texthelpers.replace_whitespace(text.replace('\n', ' ')).strip()
        description = texthelpers.truncate(text, 100, '', whole_word=True)

        if content_type:
            doc.update_plugin_and_canonical_attr('content_type',
                                                 content_type[0])
        if date:
            doc.update_plugin_and_canonical_attr('created', date)
        if content:
            doc.update_plugin_attr('full_html', content)
            doc.register_html_representation('full_html')
        if text:
            doc.update_plugin('text', text)
            doc.register_searchable_field("text")
        if description:
            doc.update_plugin_and_canonical_attr('description', description)
        doc.finish_parsing(version)
        doc.reindex()