Пример #1
0
def edit(resnum):
    if not hasrclextract:
        return 'Sorry, needs recoll version 1.19 or later'
    query = get_query()
    qs = query_to_recoll_string(query)
    rclq = recoll_initsearch(query)
    if resnum > rclq.rowcount - 1:
        return 'Bad result index %d' % resnum
    rclq.scroll(resnum)
    doc = rclq.fetchone()
    bottle.response.content_type = doc.mimetype
    bottle.response.headers['Content-Disposition'] = \
        'attachment; filename=%s' % doc.filename
    # If ipath is null, we can just return the file
    pathismine = False
    if doc.ipath == '':
        path = doc.url.replace('file://','')
    else:
        xt = rclextract.Extractor(doc)
        path = xt.idoctofile(doc.ipath, doc.mimetype)
        pathismine = True
    print >> sys.stderr, "Sending %s with mimetype %s" % (path, doc.mimetype)
    f = open(path, 'r')
    if pathismine:
        os.unlink(path)
    return f
Пример #2
0
def edit(resnum):
    query = get_query()
    qs = query_to_recoll_string(query)
    rclq = recoll_initsearch(query)
    if resnum > rclq.rowcount - 1:
        return 'Bad result index %d' % resnum
    rclq.scroll(resnum)
    doc = rclq.fetchone()
    bottle.response.content_type = doc.mimetype
    pathismine = False

    xt = rclextract.Extractor(doc)
    path = xt.idoctofile(doc.ipath, doc.mimetype)
    pathismine = True

    if (not doc.ipath) and "filename" in doc.keys():
        filename = doc.filename
    else:
        filename = os.path.basename(path)
    bottle.response.headers['Content-Disposition'] = \
        'attachment; filename="%s"' % filename
    bottle.response.headers['Content-Length'] = os.stat(path).st_size
    f = open(path, 'rb')
    if pathismine:
        os.unlink(path)
    return f
Пример #3
0
def edit(resnum):
    if not hasrclextract:
        return 'Sorry, needs recoll version 1.19 or later'
    query = get_query()
    qs = query_to_recoll_string(query)
    rclq = recoll_initsearch(query)
    if resnum > rclq.rowcount - 1:
        return 'Bad result index %d' % resnum
    rclq.scroll(resnum)
    doc = rclq.fetchone()
    bottle.response.content_type = doc.mimetype
    pathismine = False
    if doc.ipath == '':
        # If ipath is null, we can just return the file
        path = doc.url.replace('file://', '')
    else:
        # Else this is a subdocument, extract to temporary file
        xt = rclextract.Extractor(doc)
        path = xt.idoctofile(doc.ipath, doc.mimetype)
        pathismine = True
    bottle.response.headers['Content-Disposition'] = \
        'attachment; filename="%s"' % os.path.basename(path).encode('utf-8')
    path = path.encode('utf-8')
    bottle.response.headers['Content-Length'] = os.stat(path).st_size
    f = open(path, 'r')
    if pathismine:
        os.unlink(path)
    return f
Пример #4
0
async def recoll_packet_text(config, resnum, query, searchtype, dir, sort,
                             ascending, page):
    if not hasrclextract:
        return 'Sorry, needs recoll version 1.19 or later'
    query = wrap_query(query, searchtype)
    q = recoll_initsearch(config, query, dir, sort, ascending)
    if resnum > q.rowcount - 1:
        return 'Bad result index %d' % resnum
    q.scroll(resnum)
    doc = q.fetchone()
    xt = rclextract.Extractor(doc)
    tdoc = xt.textextract(doc.ipath)
    return tdoc.text, render_packet_name(doc.filename)
Пример #5
0
def preview(resnum):
    query = get_query()
    qs = query_to_recoll_string(query)
    rclq = recoll_initsearch(query)
    if resnum > rclq.rowcount - 1:
        return 'Bad result index %d' % resnum
    rclq.scroll(resnum)
    doc = rclq.fetchone()
    xt = rclextract.Extractor(doc)
    tdoc = xt.textextract(doc.ipath)
    if tdoc.mimetype == 'text/html':
        bottle.response.content_type = 'text/html; charset=utf-8'
    else:
        bottle.response.content_type = 'text/plain; charset=utf-8'
    return tdoc.text
Пример #6
0
def extractofile(doc, outfilename=""):
    extractor = rclextract.Extractor(doc)
    outfilename = extractor.idoctofile(doc.ipath,
                                       doc.mimetype,
                                       ofilename=outfilename)
    return outfilename
Пример #7
0
 def textextract(self, index):
     doc = self.searchResults[index]
     extractor = rclextract.Extractor(doc)
     newdoc = extractor.textextract(doc.ipath)
     print(newdoc.text)
Пример #8
0
def utf8string(s):
    if ISP3:
        return s
    else:
        return s.encode('utf8')


db = recoll.connect()
query = db.query()

# This normally has only one result, a well-known html file
nres = query.execute("HtmlAttachment_uniqueTerm", stemming=0)
print("Result count: %d %d" % (nres, query.rowcount))
doc = query.fetchone()
xtrac = rclextract.Extractor(doc)
doc = xtrac.textextract(doc.ipath)
print("Text length: %d" % len(doc.text))

refdigest = 'bfbb63f7a245c31767585b45014dbd07'

# This normally has 2 results, one of which is a pdf attachment.
nres = query.execute("population_size_cultural_transmission", stemming=0)
for doc in query:
    if doc.mimetype == 'application/pdf':
        xtrac = rclextract.Extractor(doc)
        filename = xtrac.idoctofile(doc.ipath, doc.mimetype)
        f = open(filename, 'rb')
        data = f.read()
        f.close()
        m = hashlib.md5()
Пример #9
0
def extract(doc):
    extractor = rclextract.Extractor(doc)
    newdoc = extractor.textextract(doc.ipath)
    return newdoc