from montysolr import config, optionparse import sys import gdata.spreadsheet.service log = config.get_logger('montysolr.gd_add_row') def run(user=None, password=None, spreadsheet=None, keys=[], data=[], verbose=None, sep=None): ''' Utility to add values into a Google SpreadSheet usage: %prog -u montysolr -p <password> -s SpreadSheetName -k date,docs -d 12/13/14,5000 -p, --password = PASS: password to access the Google Data -u, --user = USER: user name -s, --spreadsheet = FILE: spread sheet name (you must have created the spreadsheet manually!) -k, --keys = KEYS: comma separated names from the header in the table (you must have created it manually!) -d, --data = DATA: comma separated list of values to insert, each row is then split using row-separator -e, --sep = SEP: character to use as a value separator for the data fields, default [|] -v, --verbose = VERBOSE: numeric value of the logging module [30] ''' log.setLevel(int(verbose)) vals = [] for v in data: vv = v.split(sep) if len(vv) != len(keys):
import sys import time import logging import urllib2 as u2 import httplib import simplejson import urllib from montysolr import config log = config.get_logger("montysolr.examples.adsabs.run_dump") def req(url, **kwargs): kwargs['wt'] = 'json' params = urllib.urlencode(kwargs) page = '' try: conn = urllib.urlopen(url, params) page = conn.read() rsp = simplejson.loads(page) conn.close() return rsp except Exception, e: log.error(str(e)) log.error(page) raise e def run_dump(solr_url,
import sys import time import logging import urllib2 as u2 import httplib import simplejson import urllib from montysolr import config log = config.get_logger("montysolr.examples.adsabs.reindex") def req(url, **kwargs): kwargs["wt"] = "json" params = urllib.urlencode(kwargs) page = "" try: conn = urllib.urlopen(url, params) page = conn.read() rsp = simplejson.loads(page) conn.close() return rsp except Exception, e: log.error(str(e)) log.error(page) raise e def run_dump(solr_url, wait=10, max_wait=3600):
from montysolr import config, optionparse from lxml import etree import sys log = config.get_logger('montysolr.extract_values') def run(xpath, input=None, sep=None, verbose=None): ''' Utility to extract text values from the xml usage: %prog [options] -i, --input = I: file input -e, --sep = SEP: character to use as a value separator for the data fields, default [|] -v, --verbose = VERBOSE: numeric value of the logging module [30] ''' log.setLevel(int(verbose)) root = etree.parse(input) tree = root.getroot() for xp in xpath: xp = xp.replace('\\n', '\n').replace('\\t', '\t')
from montysolr import config, optionparse from lxml import etree import sys log = config.get_logger('montysolr.extract_values') def run(xpath, input=None, sep=None, verbose=None): ''' Utility to extract text values from the xml usage: %prog [options] -i, --input = I: file input -e, --sep = SEP: character to use as a value separator for the data fields, default [|] -v, --verbose = VERBOSE: numeric value of the logging module [30] ''' log.setLevel(int(verbose)) root = etree.parse(input) tree = root.getroot() for xp in xpath: xp = xp.replace('\\n', '\n').replace('\\t', '\t') log.info(xp) elems = tree.xpath(xp) if len(elems) < 1: log.error("Nothing found in %s for xpath %s" % (input, xp))
import sys import time import logging import urllib2 as u2 import httplib import simplejson import urllib from montysolr import config log = config.get_logger("montysolr.examples.adsabs.run_dump") def req(url, **kwargs): kwargs["wt"] = "json" params = urllib.urlencode(kwargs) page = "" try: conn = urllib.urlopen(url, params) page = conn.read() rsp = simplejson.loads(page) conn.close() return rsp except Exception, e: log.error(str(e)) log.error(page) raise e def run_dump(solr_url, source_field="author", target_field="author_collector", max_wait=3600):
import sys import os from montysolr import config from monty_examples.utils import req log = config.get_logger("montysolr.examples.measure_qtime") def run(solr_url, query, repetitions=1): repetitions = int(repetitions) if (os.path.exists(query)): queries = load_queries(query) log.info("Loaded %s queries from: %s" % (len(queries), query)) else: queries = [query] results = {} for i in (range(repetitions)): log.info("Starting iteration: #%s" % i) for q in queries: log.info("%s" % q) rsp = req(solr_url, q=q, rows=0) if (not rsp['responseHeader'].has_key('status') or rsp['responseHeader']['status'] != 0): log.error("Error searching: %s" % str(rsp)) continue
from montysolr import config, optionparse import sys import gdata.spreadsheet.service log = config.get_logger('montysolr.gd_add_row') def run(user=None, password=None, spreadsheet=None, keys=[], data=[], verbose=None, sep=None): ''' Utility to add values into a Google SpreadSheet usage: %prog -u montysolr -p <password> -s SpreadSheetName -k date,docs -d 12/13/14,5000 -p, --password = PASS: password to access the Google Data -u, --user = USER: user name -s, --spreadsheet = FILE: spread sheet name (you must have created the spreadsheet manually!) -k, --keys = KEYS: comma separated names from the header in the table (you must have created it manually!) -d, --data = DATA: comma separated list of values to insert, each row is then split using row-separator -e, --sep = SEP: character to use as a value separator for the data fields, default [|] -v, --verbose = VERBOSE: numeric value of the logging module [30] ''' log.setLevel(int(verbose)) vals = []
import sys import time import pprint from montysolr import config from monty_examples.utils import req log = config.get_logger("montysolr.examples.adsabs.recreate_index") def recreate_index(solr_url, max_time=3600, delay=5, handler_name='/invenio/update', maximport=500, batchsize=2000, startfrom=-1, inveniourl='python://search', importurl='/invenio/import?command=full-import&dirs=', updateurl='/invenio/import?command=full-import&dirs=', deleteurl='blankrecords', doctor_handler='/invenio-doctor' ): up_url = solr_url + handler_name doctor_url = solr_url + doctor_handler delay = int(delay)
from montysolr import config, optionparse import sys import gdata.spreadsheet.service log = config.get_logger("montysolr.gd_add_row") def run(user=None, password=None, spreadsheet=None, keys=[], data=[], verbose=None, sep=None): """ Utility to add values into a Google SpreadSheet usage: %prog -u montysolr -p <password> -s SpreadSheetName -k date,docs -d 12/13/14,5000 -p, --password = PASS: password to access the Google Data -u, --user = USER: user name -s, --spreadsheet = FILE: spread sheet name (you must have created the spreadsheet manually!) -k, --keys = KEYS: comma separated names from the header in the table (you must have created it manually!) -d, --data = DATA: comma separated list of values to insert, each row is then split using row-separator -e, --sep = SEP: character to use as a value separator for the data fields, default [|] -v, --verbose = VERBOSE: numeric value of the logging module [30] """ log.setLevel(int(verbose)) vals = [] for v in data: vv = v.split(sep) if len(vv) != len(keys): log.error("The data is not of the same size as header!") log.error("header=%s, data=%s" % (keys, v)) raise (Exception("Wrong input"))