def main(args): s = solr.SolrConnection('http://' + solrUrl + '/solr/newsdai') json_files = [ pos_json for pos_json in os.listdir(dataDir) if pos_json.endswith('.json') ] icnt = 0 class Options: dataDir = '..' opts = Options() a = Article("", opts) for ii, js in enumerate(json_files): if ii % 21 == 0: print('processing {}'.format(js)) with open(os.path.join(dataDir, js)) as json_file: json_txt = json.load(json_file) for jj, doc in enumerate(json_txt): #{ "id":icnt, "Headline":ee['Headline'], #"CompanyCodes":ee['CompanyCodes'], "GmtTimeStamp":ee['GmtTimeStamp'] } try: svo = a.getSVO(doc['Headline']) if svo and len(svo) > 1: doc['SVO'] = svo[0] s.add(**doc, commit=True) except: traceback.print_exc() if icnt % 1001 == 0: print('adding {} doc'.format(icnt), end='\r', flush=True) icnt += 1
class searchManager: connection = solr.SolrConnection('http://localhost:8983/solr') # connection = urlopen( # 'http://localhost:8983/solr/select?q=*&wt=python') # response = eval(connection.read()) # # # print response['response']['numFound'], "documents found." # # # Print the name of each document. # # for document in response['response']['docs']: # print " Name =", document['name'] def addDocuments(self): # add a document to the index doc = dict( id=789, title='Lucene in Action', author=['Erik Hatcher', 'Otis Gospodneti'], ) doc = {"id": 34567, "title": "Mbuoe"} searchManager.connection.add(doc) # searchManager.connection.add(doc, commit=True) def searchDocuments(self, keyword): # do a search response = searchManager.connection.query('title:',keyword) for hit in response.results: print hit['title']
def main(argv): chunkFile = None solrUrl = None outputFile = None usage = 'check_failed.py -f <chunk file> -s <solr url> -o <output file> ' try: opts, args = getopt.getopt(argv, "hf:s:o:", ["chunkFile=", "solrUrl=", "outputFile="]) except getopt.GetoptError: print usage sys.exit(2) for opt, arg in opts: if opt == '-h': print usage sys.exit() elif opt in ("-f", "--chunkFile"): chunkFile = arg elif opt in ("-s", "--solrUrl"): solrUrl = arg elif opt in ("-o", "--outputFile"): outputFile = arg if chunkFile == None or solrUrl == None or outputFile == None: print usage sys.exit() print "Chunk File : [" + str(chunkFile) + "]" print "Solr URL : [" + str(solrUrl) + "]" print "Output File : [" + str(outputFile) + "]" s = solr.SolrConnection(solrUrl) buildChunkFile(outputFile, chunkFile, s)
def solr_add(**data_dict): commit = data_dict.pop('__commit__',True) s = solr.SolrConnection(settings.PRODUCT_SOLR) s.add(**data_dict) if commit: s.commit() s.close()
def searchApiSolr(URLToSolr,pathToData,search_string,psc_pattern,limit=ppApiConfig.LIMIT_NUM_MATCHING_TRANSACTIONS): # create a connection to a solr server solrCon = solr.SolrConnection(URLToSolr) localTransactionDir = None timeSearchBegin = time.clock() transaction = None print "Not using cache." logger.info("Not using cache.") localTransactionDir = Transaction.TransactionDirector() t1 = time.clock() logger.info("Searching for search_string,psc" + search_string+","+psc_pattern) logger.error("Searching for search_string,psc" + search_string+","+psc_pattern) # do a search mainSearch = AGGREGATED_TEXT_FIELD+':'+search_string pscSearch = Transaction.PSC+':'+psc_pattern # the magic happens here... # you can add q_op='AND' here, but it seems to shut down all instances. I'm afraid # I either need to use ediscmax or do something else. print "rows = "+ str(limit) if (psc_pattern == "*"): transactionDicts = solrCon.query(mainSearch,rows=limit,fl='*,score',deftype='edismax') else: transactionDicts = solrCon.query(mainSearch,rows=limit,fq=pscSearch,fl='*,score',deftype='edismax') return processSolrResults(transactionDicts)
def facetResults(facet): """ Returns dict of { facet_value_a: count_of_facet_value_a_entries }. """ log.debug('facet, `%s`' % facet) try: # s = solr.SolrConnection( settings_app.SOLR_URL ) # q = s.select( u'*:*', **{u'facet':u'true',u'facet.field':facet,u'rows':u'0',u'facet.limit':u'-1', u'facet.mincount':u'1'} ) # log.debug( 'q, ```%s```' % q ) # facet_count_dict =q.facet_counts[u'facet_fields'][facet] # return facet_count_dict s = solr.SolrConnection(settings_app.SOLR_URL) params = { u'facet': u'true', u'facet.field': facet, u'rows': u'0', u'facet.limit': u'-1', u'facet.mincount': u'1' } q = s.select(u'*:*', **params) # log.debug( 'q.__dict__, ```%s```' % pprint.pformat(q.__dict__) ) facet_count_dict = q.facet_counts[u'facet_fields'][facet] return facet_count_dict except Exception as e: log.error('test') # raise Exception( str(e) ) log.error('in common.facetResults(); exception, %s' % str(e))
def feedback(self): # Get feedback for a result # request.params has guid and type ("up" or "down") conn = solr.SolrConnection('http://localhost:8983/solr') # Get solrpy to do the update conn.close()
def get(self, request, *args, **kwargs): solrconn = solr.SolrConnection(settings.SOLR_SERVER) manuscript_id = kwargs['pk'] if 'image_uri' in kwargs: image_uri = kwargs['image_uri'] composed_request = u'type:"cantusdata_folio" AND manuscript_id:{0} AND image_uri:"{1}"'\ .format(manuscript_id, image_uri) result = solrconn.query(composed_request, sort="number asc", rows=1, fields=FOLIO_FIELDS, score=False) # We only want the single result! # TODO: Figure out the best way to handle this if (result.results): return Response(result.results[0]) else: raise Http404("No data for a folio with that number") else: composed_request = u'type:"cantusdata_folio" AND manuscript_id:{0}'.format( manuscript_id) results = solrconn.query(composed_request, sort="number asc", rows=1000, fields=FOLIO_FIELDS, score=False) return Response(results)
def search(self): c.service = "search" query = request.params['query'] if request.params.has_key('start'): start = request.params['start'] else: start = 0 if query.strip() == "": return render('/index.mako') conn = solr.SolrConnection('http://localhost:8983/solr') # Grab data from Solr params = {'q': sanitize(query), 'rows': 10, 'start': start} results = conn.query(**(params)) conn.close() if len(results) == 0: c.query = query return render('/noresults.mako') # Send params to context c.start = start c.results = results c.query = query return render('/results.mako')
def merge_article_results(self, responses, query): new_responses = [] all_results = [] for i in range(0, len(responses)): r = responses[i] num_found = r.numFound query_key = 'term' + str(i + 1) s = solr.SolrConnection(self.solr_url) s_query = "text:\"%s\" AND journal:\"%s\"" % (query[query_key], query['journal']) params = {'start': '0', 'rows': str(num_found)} new_responses.append(s.query(s_query, **params)) keys_sets = [] for rsp in new_responses: key_set = set() for r in rsp.results: all_results.append(r) key_set.add(r['journal_art_id']) keys_sets.append(key_set) final_keys = keys_sets[0] for i in range(1, len(keys_sets)): final_keys = final_keys & keys_sets[i] return [r for r in all_results if r['journal_art_id'] in final_keys]
def run(filename): reader = csv.reader(open(filename, 'r')) def group(it, size): it = iter(it) def subit(it, first, size): yield first for i in xrange(size): yield it.next() while True: yield subit(it, it.next(), size - 1) s = solr.SolrConnection('http://localhost:8983/solr') def quorum(articles): articles = list(articles[1]) return articles[0] def get_article(article): article = Article(*(f.decode('utf-8') for f in article))._asdict() article['keywords'] = article['keywords'].split(u', ') return article reader = itertools.imap(quorum, itertools.groupby(reader, key=lambda x: x[0])) for i, g in enumerate(group(reader, 10000)): print "Loading", i s.add_many(itertools.imap(get_article, g)) s.commit()
def data(rows_returned): search = request.args.get("search") query = search search_collection = 'linksdgs' search_server = 'http://solr4-jmmnn-1.c9users.io/solr/' + str(search_collection) s = solr.SolrConnection(search_server) response = s.select(query , rows=rows_returned) return render_template("data.json", response=response)
def solr_delete(sender, instance, created, **kwargs): from django.conf import settings import solr solrconn = solr.SolrConnection(settings.SOLR_SERVER) record = solrconn.query("type:goudimel_book item_id:{0}".format( instance.id)) solrconn.delete(record.results[0]['id']) solrconn.commit()
def order_solr_add(**data_dict): commit = data_dict.pop('__commit__',True) s = solr.SolrConnection(settings.ORDER_SOLR) s.add(**data_dict) # Disable explicit commit #if commit: # s.commit() s.close()
def run_task(self): task_solrs = self.task.solr_instances.all() solrs_connections = list( task_solrs.values_list('str_connection', flat=True)) for conn in solrs_connections: client = solr.SolrConnection(conn) client.delete_query("*:*") client.commit()
def update_search_index(self, request, queryset): from .search_indexes import ProductIndex ex_form = ExtraActionForm(request.POST) if ex_form.is_valid() and ex_form.cleaned_data.get('solr_instances'): client = solr.SolrConnection(ex_form.cleaned_data['solr_instances']) solr_inst = ProductIndex(client) solr_inst.reindex_by_qset(queryset, queryset.filter(availability=True)) client.close()
def search(request, category, source, search_value, sort="", page=""): search_value = search_value.replace("%20", " ") suggestion_list = "" connection = solr.SolrConnection('http://localhost:8983/solr/CZ4034', debug=True) if source == "All2": status_list = connection.query('content:"' + search_value + '"', rows=100).results else: status_list = connection.query('content:"' + search_value + '"' + ' AND name:' + source).results conn = urlopen('http://localhost:8983/solr/CZ4034/suggest?q=' + search_value.replace(" ", "%20") + '&wt=json') suggestion_json = json.load(conn)["spellcheck"]["suggestions"] if len(suggestion_json) > 1: suggestion_list = suggestion_json[1]["suggestion"] original_word = suggestion_json[0] temp_suggestion_list = [] for suggestion in suggestion_list: temp_suggestion_list.append( search_value.replace(original_word, suggestion)) suggestion_list = temp_suggestion_list for status in status_list: status["retweet_count"] = status["retweet_count"][0] if sort == "Popularity": status_list = sorted(status_list, key=lambda status_list: status_list["like"], reverse=True) elif sort == "Retweet": status_list = sorted( status_list, key=lambda status_list: status_list["retweet_count"], reverse=True) else: status_list = sorted(status_list, key=lambda status_list: status_list["time"], reverse=True) request.session.status_list = status_list pages = getPage(request) request.session.status_list = getStatusList(status_list, page) return render( request, 'home2.html', { 'sort': sort, 'status_list': request.session.status_list, 'search_value': search_value, 'source': source, 'category': category, 'suggestion_list': suggestion_list, 'pages': pages })
def order_solr_suggest(q): s = solr.SolrConnection(settings.ORDER_SOLR, **{ 'operation': '/terms'}) response = s.raw_query(** { 'terms_fl': 'suggest', 'wt': 'json', 'omitHeaders': 'true', 'terms_prefix': q}) return response
def order_solr_search(q, fields=None, highlight=None, score=True, sort=None, sort_order='asc', operation='/select', **kw): s = solr.SolrConnection(settings.ORDER_SOLR) try: return s.query(q, fields, highlight, score, sort, sort_order, **kw) except solr.SolrException, se: # Not logging for now return None
def solr_tags(fields, q='*:*'): s = solr.SolrConnection(settings.PRODUCT_SOLR) res = s.raw_query(q=q, wt='json', facet='true', facet_field=fields) result = simplejson.loads(res)['facet_counts']['facet_fields'] r = [] for k,v in result.items(): r.extend(v) response = dict([(r[i],r[i+1]) for i in range(len(r)-1)[::2]]) return response
def getBlog(self, filter): conn = solr.SolrConnection('http://localhost:8983/solr') print filter parsedFilter = SearchFilter() parsedFilter.decode(filter) print parsedFilter.toSolr() response = conn.query(**(parsedFilter.toSolr())) conn.close() return [hit['link'] for hit in response.results]
def execute(self): conn = solr.SolrConnection(self.solr_server_url) for deleted in self._deletions: deleted.delete_from_solr(conn) conn.add_many( [added.create_solr_record() for added in self._additions]) conn.commit()
def main(argv): #solrUrl = 'http://127.0.0.1:8080/solr/apidocs' # The URL of the solr instance #path = "D:/Git/wrdoclet/wrdoclet/target/doc/" # The directory of files to publish to solr solrUrl = '' path = '' buildID = '' try: opts, agrs = getopt.getopt(argv, "hi:s:b:", ["inputpath=", "solraddr=", "buildid="]) except getopt.GetoptError: usage() sys.exit(2) for opt, arg in opts: if opt == "-h": usage() sys.exit() elif opt in ("-i", "--inputpath"): path = arg elif opt in ("-s", "--solraddr"): solrUrl = arg elif opt in ("-b", "--buildid"): buildID = arg if solrUrl == '' or path == '' or buildID == '': usage() sys.exit() print "input path of docs: ", path print "solr address url: ", solrUrl print "doc build ID: ", buildID solrInstance = solr.SolrConnection(solrUrl) # Solr Connection object publishedCount = 0 skippedCount = 0 processedSet = set() try: pCount, sCount, pSet = publishToSolr(solrInstance, path, buildID) publishedCount += pCount skippedCount += sCount processedSet |= pSet #just for migration of old type users, official wrdoclet user will not hit this code path. pathlist = [ join(path, f) for f in listdir(path) if isdir(join(path, f)) and f.startswith('detail') ] for p in pathlist: pCount, sCount, pSet = publishToSolr(solrInstance, p, buildID) publishedCount += pCount skippedCount += sCount processedSet |= pSet except PublishException, ex: printSummary(publishedCount + ex.publishedCount, skippedCount + ex.skippedCount) raise
def __init__(self, url): """Initialize the wrapper with the search url. Args: url (string) The Solr URL for the searchparty collection """ self.url = url self.conn = solr.SolrConnection(url) self.weightage = {} self.setWeigtage([10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])
def data2(): row = request.args.get("row") query = request.args.get("query") search_collection = 'linksdgs' search_server = 'http://solr4-jmmnn-1.c9users.io/solr/' + str(search_collection) s = solr.SolrConnection(search_server) response = s.select('india' , rows=10) return render_template("data2.json", response=response)
def query_journal(self, query, search_type): s = solr.SolrConnection(self.solr_url) try: response = self.journal_func_map[search_type.name]( self, s, query, self.journal_facet_params) return self.process_journal_results(response, search_type) except KeyError as k: return None
def solr_delete(sender, instance, **kwargs): from django.conf import settings import solr solrconn = solr.SolrConnection(settings.SOLR_SERVER) record = solrconn.query("id:{0}".format(instance.id)) if record: # the record already exists, so we'll remove it first. print("Deleting ".format(record.results[0]['id'])) solrconn.delete(record.results[0]['id'])
def test_solr_deletion(self): pk = self.first_manuscript.pk solrconn = solr.SolrConnection(settings.SOLR_SERVER) self.first_manuscript.delete_from_solr(solrconn) solrconn.commit() indexed = solrconn.query('type:cantusdata_manuscript AND item_id:{}'.format(pk)) self.assertEqual(indexed.numFound, 0)
def SolrOptimize(indexing): # # Clear settings # print " - Optimize Solr data" for index in indexing: sol = solr.SolrConnection(solrconfig.solr_urls_stage[index]) sol.optimize() sol.close() print " - " + index + " optimized." sol = solr.SolrConnection(solrconfig.solr_urls_stage['all']) sol.optimize() sol.close() print ' - all optimized.'
def brewers(request, brewer_name): s = solr.SolrConnection('http://localhost:8983/solr') searchresults = s.query('brewery_name:"%s"' % brewer_name) form = SearchForm() return render_to_response( 'searchresults.html', { 'searchresults': searchresults, 'title': 'Beers from', 'em': brewer_name, 'form': form })