def update_all(self): self.stdout.write("Updating all Courses in Solr") c = 0 for lang in sorted(set(LANG_MAPPING.values())): SOLR_URL = settings.SOLR_URL % lang solr = pysolarized.Solr(SOLR_URL) self.stdout.write("Adding %s to %s" % (lang, SOLR_URL)) for course in Course.objects.filter(language__iexact=lang, provider__isnull=False): if not course.title: continue solr_doc = { 'id': course.linkhash, 'title': course.title, 'description': course.description, 'link': course.linkurl, 'source': course.provider.name, 'language': course.language, 'is_member': True } solr.add(solr_doc) if c % 200 == 0: solr.commit() c += 1 solr.commit() solr.optimize()
def delete_all(self): self.stdout.write("Deleting all Courses in Solr") for lang in sorted(set(LANG_MAPPING.values())): SOLR_URL = settings.SOLR_URL % lang solr = pysolarized.Solr(SOLR_URL) self.stdout.write("Deleting %s at %s" % (lang, SOLR_URL)) solr.deleteAll() solr.commit()
def index(request): context = { 'tipi': [{ 'value': k, 'name': v._meta.verbose_name_plural } for k, v in search_register_by_name.iteritems()] } if request.GET.has_key('q'): context["query"] = query = request.GET["q"] context['qfilter'] = qfilter = request.GET.getlist('tip') filterquery = None if qfilter: qfilter = [i for i in qfilter if i in search_register_by_name] filterquery = {'tip': '(%s)' % (' OR '.join(qfilter), )} # Check for current page page = 1 # We start counting from 1 otherwise page = 0 confuses template ifs if request.GET.has_key("page"): try: page = max(1, int(request.GET["page"])) except ValueError: page = 1 # Do search solr = pysolarized.Solr(settings.SOLR_URL) results = solr.query(query.encode('raw_unicode_escape'), sort=["score desc, datum_timestamp desc"], filters=filterquery, start=(page - 1) * RESULTS_PAGE_SIZE, rows=RESULTS_PAGE_SIZE) # Parse search results if not results or results.results_count == 0: context["results"] = None else: context['results'] = results.documents # Figure out pagination if results.results_count > RESULTS_PAGE_SIZE: if results.start_index > 0: context["prev_page"] = max(1, page - 1) # Page + 1 since we're counting from 0 if page * RESULTS_PAGE_SIZE < results.results_count: context["next_page"] = page + 1 context["results"] = results.documents context['highlights'] = results.highlights return render(request, 'search.html', context)
def do_solr_import(self): solr = pysolarized.Solr(settings.SOLR_URL) solr.deleteAll() for oseba in Oseba.objects.all(): doc = { "id": "os_%s" % (oseba.id, ), "id_db": oseba.id, "tip": "oseba", "ime": "%s %s" % ( oseba.ime, oseba.priimek, ), "str_slug": oseba.slug } if oseba.twitter: doc["str_twitter"] = oseba.twitter if oseba.facebook: doc["str_facebook"] = oseba.facebook if oseba.rojstni_dan: doc["datum_rojstva"] = pysolarized.to_solr_date( oseba.rojstni_dan) solr.add(doc) for stranka in Stranka.objects.all(): doc = { "id": "st_%s" % (stranka.id, ), "id_db": stranka.id, "tip": "stranka", "ime": stranka.ime, "str_okrajsava": stranka.okrajsava } if stranka.od: doc["datum_od"] = pysolarized.to_solr_date(stranka.od) if stranka.do != END_OF_TIME: doc["datum_do"] = pysolarized.to_solr_date(stranka.do) solr.add(doc) for seja in Seja.objects.select_related().all(): for zasedanje in seja.zasedanje_set.select_related().all(): for zapis in zasedanje.zapis_set.select_related().all(): zapis_doc = { "id": "zap_%s" % (zapis.id, ), "id_db": zapis.id, "tip": "zapis", "vsebina": zapis.odstavki, "str_ime_seje": seja.naslov, "str_permalink": zapis.permalink, "str_seq": zapis.seq } if zapis.govorec: zapis_doc["id_oseba"] = zapis.govorec_oseba_id if zapis.govorec_oseba: zapis_doc["txt_govorec"] = "%s %s" % ( zapis.govorec_oseba.ime, zapis.govorec_oseba.priimek, ) zapis_doc[ "str_govorec_slug"] = zapis.govorec_oseba.slug zapis_doc["id_zasedanje"] = zapis.zasedanje_id zapis_doc["id_seja"] = zapis.zasedanje.seja_id if zapis.datum: zapis_doc["datum_zapisa"] = pysolarized.to_solr_date( zapis.datum) solr.add(zapis_doc) solr.add(doc) solr.commit()
def search(request): def _build_course_doc(course): if course.source: source = course.source.provider.name elif course.author_organization: source = course.author_organization elif not course.author and course.author_organization: source = course.author_organization elif course.author: source = course.author else: source = '' provider_id = '' if course.source: provider_id = course.source.provider.id cat_tree = [] for cat in course.merlot_categories.all(): cat_tree.append('/'.join( ['All'] + map( unicode, cat.get_ancestors() ) + [cat.name] ) ) if course.merlot_languages.exists(): language = ','.join([lang.name for lang in course.merlot_languages.all()]) else: language = course.language doc = { 'description': course.description, 'language': language, 'title': course.title, 'is_member': bool(course.provider), 'source': source, 'link': course.linkurl, 'id': course.linkhash, 'author': course.author or '', 'author_organization': course.author_organization, 'oec_provider_id': provider_id, 'categories': cat_tree, 'merlot_id': course.merlot_id } return doc def encode_obj(in_obj): """ http://stackoverflow.com/a/26568590/141200 """ def encode_list(in_list): out_list = [] for el in in_list: out_list.append(encode_obj(el)) return out_list def encode_dict(in_dict): out_dict = {} for k, v in in_dict.iteritems(): out_dict[k] = encode_obj(v) return out_dict if isinstance(in_obj, unicode): return in_obj.encode('utf-8') elif isinstance(in_obj, list): return encode_list(in_obj) elif isinstance(in_obj, tuple): return tuple(encode_list(in_obj)) elif isinstance(in_obj, dict): return encode_dict(in_obj) return in_obj def _update_metadata(material): url = material.find('URL').text try: course = Course.objects.get(linkhash=Course.calculate_linkhash(url)) if course.merlot_synced: return course except Course.DoesNotExist: course = Course() try: photo_url = material.find('photoURL').text except AttributeError: photo_url = '' course_data = { 'linkurl': url, 'title': material.find('title').text, 'merlot_id': material.find('materialid').text, 'description': material.find('description').text, 'author': material.find('authorName').text or '', 'author_organization': material.find('authorOrg').text or '', 'image_url': photo_url, 'merlot_xml': ET.tostring(material, encoding='utf-8'), 'merlot_synced_date': datetime.datetime.now(), 'merlot_synced': True, } course_data['creative_commons_commercial'] = 'Unsure' creativecommons = material.find('creativecommons').text if 'cc-' in creativecommons: course_data['creative_commons'] = 'Yes' if 'nc' in creativecommons: course_data['creative_commons_commercial'] = 'No' if 'sa' in creativecommons: course_data['creative_commons_derivatives'] = 'Sa' elif 'nd' in creativecommons: course_data['creative_commons_derivatives'] = 'No' else: course_data['creative_commons_derivatives'] = 'Yes' else: creativecommons = 'No' # course_data['language'] = language course_domain = urlsplit(url).netloc if Source.objects.filter(url__icontains=course_domain).exists(): source = Source.objects.filter(url__icontains=course_domain)[0] course.source = source course.provider = source.provider for k, v in course_data.items(): setattr(course, k, v) course.save() # course.merlot_categories.clear() for category in material.find('categories').findall('category'): category_id = category.attrib.get('href').split('=')[1] course.merlot_categories.add(MerlotCategory.objects.get(merlot_id=category_id)) for language_short in material.find('languages').findall('language'): if language_short.text not in MERLOT_LANGUAGE_SHORT: continue language = MERLOT_LANGUAGE_SHORT[language_short.text] merlot_language, is_created = MerlotLanguage.objects.get_or_create(name=language) # print merlot_language course.merlot_languages.add(merlot_language) course.save() return course def _merlot_search(params): if settings.DEBUG: requests_cache.install_cache('merlot') parser = etree.XMLParser(recover=True) r = requests.get(settings.MERLOT_API_URL + '/materialsAdvanced.rest', params=params) tree = ET.fromstring(r.content, parser=parser) try: num_results = int(tree.find('nummaterialstotal').text) except AttributeError: num_results = 0 documents = [] if num_results > 0: for material in tree.findall('material'): course = _update_metadata(material) doc = _build_course_doc(course) documents.append(doc) return documents, num_results """Use `q` paramater to specify query string, `legacy=1` to use the old search""" if request.GET.get('q') and request.GET.get('legacy', '0') != '0': q = request.GET.get('q') SOLR_URL = settings.SOLR_URL % 'default' solr = pysolarized.Solr(SOLR_URL) solr_kwargs = {'start': 0, 'rows': 10} page = int(request.GET.get('page', 1)) if page: solr_kwargs['start'] = (page - 1) * 10 results = solr.query(q, **solr_kwargs) if results: documents = [] for solr_doc in results.documents: course = Course.objects.get(linkhash=solr_doc['id']) doc = _build_course_doc(course) documents.append(doc) response = { 'page': page, 'count': results.results_count, 'documents': documents } if results.results_count > page * 10: response['next_page'] = urlencode(encode_obj({'page': page+1, 'legacy': 'on', 'q': q})) if page > 1: response['previous_page'] = urlencode(encode_obj({'page': page-1, 'legacy': 'on', 'q': q})) else: response = {'error': 'Search is currently not available'} elif request.GET.get('q'): page = int(request.GET.get('page', 1)) q = request.GET.get('q') params = { 'licenseKey': settings.MERLOT_KEY, 'page': page, 'keywords': q, 'creativeCommons': 1, 'sort.property': 'overallRating', 'materialType': ['Online Course', 'Open Textbook'] } data, count = _merlot_search(params) response = { 'page': page, 'count': count, 'documents': data } if count > page * 10: response['next_page'] = urlencode(encode_obj({'page': page+1, 'q': q})) if page > 1: response['previous_page'] = urlencode(encode_obj({'page': page-1, 'q': q})) else: response = {'error': 'Please use q parameter for search'} return HttpResponse(json.dumps(response), content_type="application/json")
def get_solr_backend(): solr = pysolarized.Solr(settings.SOLR_URL) return solr