示例#1
0
def upgrade(request):
    doc_list = request.db.docs
    for doc in doc_list.find():
        if "version" not in doc:
            doc["version"] = 1
            doc["created"] = datetime.utcnow()
        doc_list.save(doc)
    for doc in doc_list.find({"version": 1}):
        if "searchable_text" not in doc.keys():
            doc["version"] = 2
            doc_list.save(doc)
            continue
        searchable_text = doc.pop("searchable_text")
        lang = guessLanguage(searchable_text)
        search_terms = index(searchable_text + " " + doc["title"], [lang])
        doc["search_terms"] = search_terms
        doc["language"] = lang
        doc["version"] = 2
        doc_list.save(doc)
    for doc in doc_list.find({"version": 2}):
        doc["version"] = 3
        doc["search_terms"] = [x.lower() for x in doc["search_terms"]]
        doc_list.save(doc)
    for doc in doc_list.find({"version": 3}):
        doc["version"] = 4
        doc["keywords"] = []
        doc_list.save(doc)
    for doc in doc_list.find({"version": 4}):
        doc["version"] = 5
        doc["already_scanned"] = True
        doc_list.save(doc)
    return {"success": 1}
示例#2
0
def list_view(request):
    query_args = {}
    if "filter" in request.params:
        keys = list(index(request.params["filter"]))
        query_args.update({"search_terms": {"$in": keys}})
    if "keyword" in request.params:
        query_args.update({"keywords": {"$in": [request.params["keyword"]]}})
    docs = request.db.docs.find(spec=query_args)
    docs.sort("created", DESCENDING)
    item_count = docs.count()

    page = int(request.params.get("page", 1))
    items_per_page = 10

    url_maker = PageURL_WebOb(request)
    docs = Page(
        list(docs[(page - 1) * items_per_page:page * items_per_page]),
        url=url_maker,
        page=page,
        items_per_page=items_per_page,
        item_count=item_count,
        presliced_list=True,
        )

    distinct_keywords = request.db.docs.distinct("keywords")
    distinct_keywords.sort()
    return {"docs": docs, "distinct_keywords": distinct_keywords}
示例#3
0
def list_view(request):
    query_args = {}
    if "filter" in request.params:
        keys = list(index(request.params["filter"]))
        query_args.update({"search_terms": {"$in": keys}})
    if "keyword" in request.params:
        query_args.update({"keywords": {"$in": [request.params["keyword"]]}})
    docs = request.db.docs.find(spec=query_args)
    docs.sort("created", DESCENDING)
    item_count = docs.count()

    page = int(request.params.get("page", 1))
    items_per_page = 10

    url_maker = PageURL_WebOb(request)
    docs = Page(
        list(docs[(page - 1) * items_per_page:page * items_per_page]),
        url=url_maker,
        page=page,
        items_per_page=items_per_page,
        item_count=item_count,
        presliced_list=True,
    )

    distinct_keywords = request.db.docs.distinct("keywords")
    distinct_keywords.sort()
    return {"docs": docs, "distinct_keywords": distinct_keywords}
示例#4
0
文件: db.py 项目: do3cc/Scanned-Docs
    def reindex(self):
        '''
        Reindex the document
        '''

        text_to_index = [self.doc['title']]
        for field in self.doc.get('fulltext_fields', []):
            text_to_index.append(self.doc[field])
        indexed_text = list(index(' '.join(text_to_index),
                            accepted_languages=self.accepted_languages))
        self.doc['search_terms'] = indexed_text
        self.db.docs.save(self.doc)
示例#5
0
文件: db.py 项目: do3cc/Scanned-Docs
    def reindex(self):
        '''
        Reindex the document
        '''

        text_to_index = [self.doc['title']]
        for field in self.doc.get('fulltext_fields', []):
            text_to_index.append(self.doc[field])
        indexed_text = list(
            index(' '.join(text_to_index),
                  accepted_languages=self.accepted_languages))
        self.doc['search_terms'] = indexed_text
        self.db.docs.save(self.doc)
示例#6
0
def edit_post(request):
    doc = request.db.docs.find({"_id": ObjectId(request.matchdict["id"])})[0]
    doc["title"] = request.params["title"]
    doc["created"] = datetime.strptime(request.params["created"],
                                       "%Y-%m-%dT%H:%MZ")
    description = request.params["description"]
    if description:
        doc["search_terms"] += index(request.params["description"])
    doc["keywords"] = request.params["keywords"].split()
    request.db.docs.update({"_id": doc["_id"]}, doc)

    url = request.route_url("edit", id=doc["_id"])
    request.session.flash("Changes saved", "success")
    return HTTPFound(location=url)
示例#7
0
def edit_post(request):
    doc = request.db.docs.find({"_id": ObjectId(request.matchdict["id"])})[0]
    doc["title"] = request.params["title"]
    doc["created"] = datetime.strptime(request.params["created"],
                                       "%Y-%m-%dT%H:%MZ")
    description = request.params["description"]
    if description:
        doc["search_terms"] += index(request.params["description"])
    doc["keywords"] = request.params["keywords"].split()
    request.db.docs.update({"_id": doc["_id"]}, doc)

    url = request.route_url("edit", id=doc["_id"])
    request.session.flash("Changes saved", "success")
    return HTTPFound(location=url)
示例#8
0
    def collection_get(self):
        
        limit = 2
        page = self.request.validated.get('page', 0)


        if "filter" in self.request.params:
            keys = list(index(self.request.params["filter"]))
            spec = {"search_terms": {"$in": keys}}
        else:
            spec = {}
        if page < 0:
            pages = int(self.db.count(spec) / limit)
            page = pages +1 +page
        return [x.to_jsonable_dict() for x in self.db.find(spec, limit=limit, 
            skip=page * limit, sort=[('created', 1)])]
示例#9
0
    def collection_get(self):

        limit = 2
        page = self.request.validated.get('page', 0)

        if "filter" in self.request.params:
            keys = list(index(self.request.params["filter"]))
            spec = {"search_terms": {"$in": keys}}
        else:
            spec = {}
        if page < 0:
            pages = int(self.db.count(spec) / limit)
            page = pages + 1 + page
        return [
            x.to_jsonable_dict() for x in self.db.find(
                spec, limit=limit, skip=page * limit, sort=[('created', 1)])
        ]
示例#10
0
def add(request):
    title = request.params['title']
    created = request.params.get('created', datetime.utcnow()) or \
        datetime.utcnow()
    description = request.params.get('description', '')
    force_detection = request.params.get("force_detection",
                                         'true').lower() == 'true'
    accepted_languages = request.registry.settings['accepted_languages']

    imgstream = request.params['file'].file

    doc_list = request.db.docs

    for image in get_images_from_stream(imgstream):
        try:
            lang, img, text = recognize(image, accepted_languages,
                                        force_detection)
        except TypeError, e:
            err_msg = "Error: " + str(e)
            request.session.flash(err_msg, 'failure')
            return HTTPServerError(explanation=err_msg,
                                   detail='Go back, unselect force '\
                                       'detection, and try again')
        text += " " + description + " " + title
        if text:
            search_terms = list(index(text, [lang]))
        else:
            search_terms = ''
        thumb = get_thumbnail(image)

        doc_list.insert({
            'img': Binary(image),
            'thumb': Binary(thumb),
            'created': created,
            'version': 4,
            'forced_detection': force_detection,
            'language': lang,
            'keywords': [],
            'search_terms': search_terms,
            'title': title
        })
示例#11
0
文件: add.py 项目: do3cc/Scanned-Docs
def add(request):
    title = request.params['title']
    created = request.params.get('created', datetime.utcnow()) or \
        datetime.utcnow()
    description = request.params.get('description', '')
    force_detection = request.params.get("force_detection",
                                         'true').lower() == 'true'
    accepted_languages = request.registry.settings['accepted_languages']

    imgstream = request.params['file'].file

    doc_list = request.db.docs

    for image in get_images_from_stream(imgstream):
        try:
            lang, img, text = recognize(image, accepted_languages,
                                        force_detection)
        except TypeError, e:
            err_msg = "Error: " + str(e)
            request.session.flash(err_msg, 'failure')
            return HTTPServerError(explanation=err_msg,
                                   detail='Go back, unselect force '\
                                       'detection, and try again')
        text += " " + description + " " + title
        if text:
            search_terms = list(index(text, [lang]))
        else:
            search_terms = ''
        thumb = get_thumbnail(image)

        doc_list.insert({'img': Binary(image),
                         'thumb': Binary(thumb),
                         'created': created,
                         'version': 4,
                         'forced_detection': force_detection,
                         'language': lang,
                         'keywords': [],
                         'search_terms': search_terms,
                         'title': title})
示例#12
0
def test_index():
    from lembrar import index
    retval = index.index("These are some tests", ['en'])
    assert set([u'these', u'are', u'some', u'test']) == retval