def upgrade(request): doc_list = request.db.docs for doc in doc_list.find(): if "version" not in doc: doc["version"] = 1 doc["created"] = datetime.utcnow() doc_list.save(doc) for doc in doc_list.find({"version": 1}): if "searchable_text" not in doc.keys(): doc["version"] = 2 doc_list.save(doc) continue searchable_text = doc.pop("searchable_text") lang = guessLanguage(searchable_text) search_terms = index(searchable_text + " " + doc["title"], [lang]) doc["search_terms"] = search_terms doc["language"] = lang doc["version"] = 2 doc_list.save(doc) for doc in doc_list.find({"version": 2}): doc["version"] = 3 doc["search_terms"] = [x.lower() for x in doc["search_terms"]] doc_list.save(doc) for doc in doc_list.find({"version": 3}): doc["version"] = 4 doc["keywords"] = [] doc_list.save(doc) for doc in doc_list.find({"version": 4}): doc["version"] = 5 doc["already_scanned"] = True doc_list.save(doc) return {"success": 1}
def list_view(request): query_args = {} if "filter" in request.params: keys = list(index(request.params["filter"])) query_args.update({"search_terms": {"$in": keys}}) if "keyword" in request.params: query_args.update({"keywords": {"$in": [request.params["keyword"]]}}) docs = request.db.docs.find(spec=query_args) docs.sort("created", DESCENDING) item_count = docs.count() page = int(request.params.get("page", 1)) items_per_page = 10 url_maker = PageURL_WebOb(request) docs = Page( list(docs[(page - 1) * items_per_page:page * items_per_page]), url=url_maker, page=page, items_per_page=items_per_page, item_count=item_count, presliced_list=True, ) distinct_keywords = request.db.docs.distinct("keywords") distinct_keywords.sort() return {"docs": docs, "distinct_keywords": distinct_keywords}
def reindex(self): ''' Reindex the document ''' text_to_index = [self.doc['title']] for field in self.doc.get('fulltext_fields', []): text_to_index.append(self.doc[field]) indexed_text = list(index(' '.join(text_to_index), accepted_languages=self.accepted_languages)) self.doc['search_terms'] = indexed_text self.db.docs.save(self.doc)
def reindex(self): ''' Reindex the document ''' text_to_index = [self.doc['title']] for field in self.doc.get('fulltext_fields', []): text_to_index.append(self.doc[field]) indexed_text = list( index(' '.join(text_to_index), accepted_languages=self.accepted_languages)) self.doc['search_terms'] = indexed_text self.db.docs.save(self.doc)
def edit_post(request): doc = request.db.docs.find({"_id": ObjectId(request.matchdict["id"])})[0] doc["title"] = request.params["title"] doc["created"] = datetime.strptime(request.params["created"], "%Y-%m-%dT%H:%MZ") description = request.params["description"] if description: doc["search_terms"] += index(request.params["description"]) doc["keywords"] = request.params["keywords"].split() request.db.docs.update({"_id": doc["_id"]}, doc) url = request.route_url("edit", id=doc["_id"]) request.session.flash("Changes saved", "success") return HTTPFound(location=url)
def collection_get(self): limit = 2 page = self.request.validated.get('page', 0) if "filter" in self.request.params: keys = list(index(self.request.params["filter"])) spec = {"search_terms": {"$in": keys}} else: spec = {} if page < 0: pages = int(self.db.count(spec) / limit) page = pages +1 +page return [x.to_jsonable_dict() for x in self.db.find(spec, limit=limit, skip=page * limit, sort=[('created', 1)])]
def collection_get(self): limit = 2 page = self.request.validated.get('page', 0) if "filter" in self.request.params: keys = list(index(self.request.params["filter"])) spec = {"search_terms": {"$in": keys}} else: spec = {} if page < 0: pages = int(self.db.count(spec) / limit) page = pages + 1 + page return [ x.to_jsonable_dict() for x in self.db.find( spec, limit=limit, skip=page * limit, sort=[('created', 1)]) ]
def add(request): title = request.params['title'] created = request.params.get('created', datetime.utcnow()) or \ datetime.utcnow() description = request.params.get('description', '') force_detection = request.params.get("force_detection", 'true').lower() == 'true' accepted_languages = request.registry.settings['accepted_languages'] imgstream = request.params['file'].file doc_list = request.db.docs for image in get_images_from_stream(imgstream): try: lang, img, text = recognize(image, accepted_languages, force_detection) except TypeError, e: err_msg = "Error: " + str(e) request.session.flash(err_msg, 'failure') return HTTPServerError(explanation=err_msg, detail='Go back, unselect force '\ 'detection, and try again') text += " " + description + " " + title if text: search_terms = list(index(text, [lang])) else: search_terms = '' thumb = get_thumbnail(image) doc_list.insert({ 'img': Binary(image), 'thumb': Binary(thumb), 'created': created, 'version': 4, 'forced_detection': force_detection, 'language': lang, 'keywords': [], 'search_terms': search_terms, 'title': title })
def add(request): title = request.params['title'] created = request.params.get('created', datetime.utcnow()) or \ datetime.utcnow() description = request.params.get('description', '') force_detection = request.params.get("force_detection", 'true').lower() == 'true' accepted_languages = request.registry.settings['accepted_languages'] imgstream = request.params['file'].file doc_list = request.db.docs for image in get_images_from_stream(imgstream): try: lang, img, text = recognize(image, accepted_languages, force_detection) except TypeError, e: err_msg = "Error: " + str(e) request.session.flash(err_msg, 'failure') return HTTPServerError(explanation=err_msg, detail='Go back, unselect force '\ 'detection, and try again') text += " " + description + " " + title if text: search_terms = list(index(text, [lang])) else: search_terms = '' thumb = get_thumbnail(image) doc_list.insert({'img': Binary(image), 'thumb': Binary(thumb), 'created': created, 'version': 4, 'forced_detection': force_detection, 'language': lang, 'keywords': [], 'search_terms': search_terms, 'title': title})
def test_index(): from lembrar import index retval = index.index("These are some tests", ['en']) assert set([u'these', u'are', u'some', u'test']) == retval